In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better looking plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("Urban AI Data Analysis Setup Complete!")


In [None]:
# Create synthetic urban data
np.random.seed(42)
n_districts = 1000

# Simulate urban districts data
urban_data = {
    'district_type': np.random.choice(['Residential', 'Commercial', 'Industrial', 'Mixed'], n_districts),
    'population_density': np.random.normal(5000, 2000, n_districts),
    'green_space_ratio': np.random.exponential(0.2, n_districts),
    'traffic_volume': np.random.normal(1500, 500, n_districts),
    'air_quality_index': np.random.normal(75, 25, n_districts),
    'date_established': pd.date_range('1950-01-01', periods=n_districts, freq='30D')
}

# Create DataFrame
df = pd.DataFrame(urban_data)

# Clean data
df['population_density'] = np.clip(df['population_density'], 100, 15000)
df['green_space_ratio'] = np.clip(df['green_space_ratio'], 0, 1)
df['traffic_volume'] = np.clip(df['traffic_volume'], 0, 5000)
df['air_quality_index'] = np.clip(df['air_quality_index'], 0, 500)

print(f"Urban dataset created with {len(df)} districts")
df.head()


In [None]:
# Urban data statistics
print("=== Urban Dataset Statistics ===")
print(f"Dataset shape: {df.shape}")
print(f"\nDistrict types distribution:")
print(df['district_type'].value_counts())
print(f"\nBasic statistics:")
df.describe()


In [None]:
# Create comprehensive urban analysis visualizations
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. District type distribution
df['district_type'].value_counts().plot(kind='bar', ax=axes[0,0], color='skyblue')
axes[0,0].set_title('🏙️ District Type Distribution')
axes[0,0].set_ylabel('Number of Districts')
axes[0,0].tick_params(axis='x', rotation=45)

# 2. Population density distribution
axes[0,1].hist(df['population_density'], bins=30, alpha=0.7, color='lightgreen')
axes[0,1].set_title('👥 Population Density Distribution')
axes[0,1].set_xlabel('Population Density (per km²)')
axes[0,1].set_ylabel('Frequency')

# 3. Green space vs Air quality by district type
for i, district in enumerate(df['district_type'].unique()):
    district_data = df[df['district_type'] == district]
    axes[1,0].scatter(district_data['green_space_ratio'], district_data['air_quality_index'], 
                     alpha=0.6, label=district, s=30)
axes[1,0].set_xlabel('Green Space Ratio')
axes[1,0].set_ylabel('Air Quality Index')
axes[1,0].set_title('🌿 Green Space vs Air Quality by District Type')
axes[1,0].legend()

# 4. Traffic volume vs Population density
axes[1,1].scatter(df['population_density'], df['traffic_volume'], 
                 alpha=0.5, c=df['air_quality_index'], cmap='RdYlBu_r', s=20)
axes[1,1].set_xlabel('Population Density')
axes[1,1].set_ylabel('Traffic Volume')
axes[1,1].set_title('🚗 Traffic vs Population (colored by Air Quality)')
plt.colorbar(axes[1,1].collections[0], ax=axes[1,1], label='Air Quality Index')

plt.tight_layout()
plt.show()


# 📈 Data Analysis Example

This notebook demonstrates data analysis techniques using pandas and matplotlib.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better looking plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


## Creating Sample Dataset

Let's create a sample dataset for analysis.


In [None]:
# Create sample data
np.random.seed(42)
n_samples = 1000

data = {
    'category': np.random.choice(['A', 'B', 'C', 'D'], n_samples),
    'value1': np.random.normal(50, 15, n_samples),
    'value2': np.random.exponential(2, n_samples),
    'date': pd.date_range('2023-01-01', periods=n_samples, freq='D')
}

df = pd.DataFrame(data)
df.head()


## Basic Statistics


In [None]:
# Display basic statistics
print("Dataset shape:", df.shape)
print("\nBasic statistics:")
df.describe()


## Visualizations


In [None]:
# Create subplots
fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# Distribution of categories
df['category'].value_counts().plot(kind='bar', ax=axes[0,0])
axes[0,0].set_title('Category Distribution')
axes[0,0].set_ylabel('Count')

# Histogram of value1
axes[0,1].hist(df['value1'], bins=30, alpha=0.7)
axes[0,1].set_title('Distribution of Value1')
axes[0,1].set_xlabel('Value1')
axes[0,1].set_ylabel('Frequency')

# Box plot by category
df.boxplot(column='value1', by='category', ax=axes[1,0])
axes[1,0].set_title('Value1 by Category')

# Scatter plot
axes[1,1].scatter(df['value1'], df['value2'], alpha=0.5)
axes[1,1].set_xlabel('Value1')
axes[1,1].set_ylabel('Value2')
axes[1,1].set_title('Value1 vs Value2')

plt.tight_layout()
plt.show()


## Conclusion

This analysis showed:
- Basic data exploration techniques
- Statistical summaries
- Various visualization methods

You can use this as a template for your own data analysis notebooks!
