In [None]:
import pandas as pd
pd.options.mode.copy_on_write = True 

# Phase 1: Data Discovery

In [None]:
raw = pd.read_csv("springfield_azure_resources.csv")

In [None]:
raw.head()

In [None]:
columns = ['resource_name', 'resource_type', 'location', 'owner', 'environment', 'subscription_id', 'resource_group_name', 'resource_id', 'sku','migration_wave']

data_we_care_about = raw[columns]

In [None]:
data_we_care_about.head(10)

In [None]:
data_we_care_about.describe()

# Phase 2: Data Cleansing

## Evaluate Owners

In [None]:
data_we_care_about['owner'].value_counts()

## Check environments

In [None]:
data_we_care_about['environment'].value_counts()

## Environment Cleaning

In [None]:
environment_map = {
#   'from':'to'
    'pro':'production',
    'tes':'test',
    'sta':'staging',
    'dev':'development',
    'uat':'test',
    'qa':'test'
}

data_we_care_about['environment'] = (
    data_we_care_about['environment']
    .apply(lambda environment_name: environment_name.lower().strip()[:3])
    .apply(lambda environment_name: environment_map
        .get(environment_name, 'other')
    )
)

filtered_environments = data_we_care_about.loc[data_we_care_about['environment'].isin(['development', 'production', 'test'])]


filtered_environments['environment'].value_counts()

In [None]:
filtered_environments.head()

## Check Locations

In [None]:
filtered_environments['location'].value_counts()

## Location Cleaning

In [None]:
filtered_environments['location'] = (
    filtered_environments['location']
    .apply(lambda location_name: location_name.lower()
            .strip()
            .replace(" ", "")
            .replace("-", ""))
)

filtered_environments['location'].value_counts()

## Wrangle migration wave

In [None]:
filtered_environments['migration_wave'].value_counts()

In [None]:
filtered_environments['migration_wave'] = (
    filtered_environments['migration_wave']
    .apply(lambda migration_wave: 
        migration_wave.lower()
        .replace(" ", "")
        .replace("wave","")
    ).astype(int)
)
filtered_environments['migration_wave'].value_counts()

## Business Rule 1: All locations must be 'uksouth'

In [None]:
filtered_environments['location'].value_counts()

In [None]:
filtered_environments['location'] = 'uksouth'

In [None]:
filtered_environments['location'].value_counts()

In [None]:
clean_data = filtered_environments.copy(deep=True)

## Business Rule 2:

### Data Discovery

In [None]:
pivot = (
    clean_data
    .groupby(['environment','owner'])
    .size()
    .unstack(fill_value=0)               # owners → columns, fill missing with zero
)

print(pivot)


### Implementation

In [None]:
owner_map = {
    'Apu':'Marge',
    'Milhouse':'Lisa',
    'Ned':'Homer',
    'Bart':'Homer'
}

clean_data['owner'] = (
    clean_data['owner']
    .apply(lambda owner: 
            owner_map.get(owner, owner)
        )
    )

In [None]:
pivot = (
    clean_data
    .groupby(['environment','owner'])
    .size()
    .unstack(fill_value=0)               # owners → columns, fill missing with zero
)

print(pivot)


In [None]:
clean_data.head()

In [None]:
from azure.identity import DefaultAzureCredential
from azure.mgmt.resource import ResourceManagementClient
from workshop.utilities import WorkshopUtilities


In [None]:
credential = DefaultAzureCredential()


In [None]:
credential

In [None]:
access = credential.get_token()
print(access.token)

In [None]:
client = ResourceManagementClient(credential, "springfield-sub-12345")


In [None]:
dev_df = clean_data.iloc[0:20]

In [None]:
dev_df.head()

In [None]:
# Create resource groups first
resource_groups = dev_df['resource_group_name'].unique()
for rg_name in resource_groups:
    try:
        client.resource_groups.create_or_update(
            rg_name,
            {"location": "uksouth", "tags": {"migration": "springfield"}}
        )
        print(f"✓ Created resource group: {rg_name}")
    except Exception as e:
        print(f"✗ Failed to create {rg_name}: {e}")

In [None]:
# Prepare resources for bulk creation
resources_data = []
for _, row in dev_df.iterrows():
    resources_data.append({
        'name': row['resource_name'],
        'resource_type': row['resource_type'],
        'resource_group': row['resource_group_name'],
        'location': 'uksouth',  # Standardized location
        'tags': {
            'owner': row['owner'],
            'environment': row['environment'],
            'migration_wave': str(row['migration_wave'])
        }
    })

# Bulk create resources
print(f"\nCreating {len(resources_data)} resources...")

In [None]:
for thing in resources_data:
    print(thing)

In [None]:

def progress_callback(tracker):
    print(f"\rProgress: {tracker.percentage:.1f}% ({tracker.completed} completed, {tracker.failed} failed)", 
        end='',
        flush=True
    )

tracker = WorkshopUtilities.bulk_create_resources(
    client, 
    resources_data, 
    progress_callback
)

print("\n\nMigration complete!")
print(f"Total time: {tracker.elapsed_time:.2f} seconds")
print(f"Success rate: {(tracker.completed/tracker.total)*100:.1f}%")

In [None]:
# Find Homer's resources
print("\nSearching for Homer's resources...")
homer_resources = WorkshopUtilities.find_resources_by_owner(client, "Homer")
print(f"Found {len(homer_resources)} resources owned by Homer")

In [None]:
# Show breakdown by type
resource_types = {}
for resource in homer_resources:
    resource_type = resource.type.split('/')[-1]
    resource_types[resource_type] = resource_types.get(resource_type, 0) + 1

print("\nResource breakdown:")
for rtype, count in resource_types.items():
    print(f"  - {rtype}: {count}")

In [None]:
# Transfer ownership
print("\nTransferring ownership to Marge...")

def progress_callback(tracker):
    print(f"\rProgress: {tracker.percentage:.1f}% "
            f"({tracker.completed}/{tracker.total})", 
            end='', flush=True)

result = WorkshopUtilities.transfer_ownership(
    client,
    from_owner="Homer",
    to_owner="Marge",
    progress_callback=progress_callback
)

print("\n\nTransfer complete!")
print(f"  - Total resources: {result['total_resources']}")
print(f"  - Successfully transferred: {result['successfully_transferred']}")
print(f"  - Failed transfers: {result['failed_transfers']}")
print(f"  - Duration: {result['duration_seconds']:.2f} seconds")

# Generate audit report
print("\nGenerating compliance report...")
report = WorkshopUtilities.generate_compliance_report(client)

print("\nPost-crisis ownership distribution:")
for owner, count in report['resources_by_owner'].items():
    print(f"  - {owner}: {count} resources")