In [1]:
import pandas as pd
# Budget data by department and project
budget_data = pd.DataFrame({
    'department': ['Engineering', 'Engineering', 'Engineering', 'Engineering',
                   'Marketing', 'Marketing', 'Marketing',
                   'Sales', 'Sales', 'Sales',
                   'Research', 'Research', 'Research'],
    'project': ['Cloud Infrastructure', 'Product Development', 'Security', 'Data Pipeline',
                'Digital Campaigns', 'Content Creation', 'Events',
                'CRM System', 'Sales Training', 'Partnerships',
                'AI Research', 'Market Analysis', 'User Studies'],
    'budget': [2500, 3500, 1800, 2200,
               1500, 800, 1200,
               1000, 600, 900,
               2000, 1200, 800]
})

budget_data['category'] = 'Company Budget'

print(f"Total Budget: ${budget_data['budget'].sum():,}K")
print(f"Departments: {budget_data['department'].nunique()}")
print(f"Projects: {len(budget_data)}")

# Display budget by department
print("\nBudget by Department:")
dept_budget = budget_data.groupby('department')['budget'].agg(['sum', 'count', 'mean']).round(0)
dept_budget.columns = ['Total ($K)', 'Projects', 'Avg per Project ($K)']
display(dept_budget)

Total Budget: $20,000K
Departments: 4
Projects: 13

Budget by Department:


Unnamed: 0_level_0,Total ($K),Projects,Avg per Project ($K)
department,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Engineering,10000,4,2500.0
Marketing,3500,3,1167.0
Research,4000,3,1333.0
Sales,2500,3,833.0


In [2]:
import plotly.express as px
print("Creating Circular Treemap...")

fig = px.treemap(budget_data,
                 path=['category', 'department', 'project'],
                 values='budget',
                 color='budget',
                 color_continuous_scale='Plasma',
                 title='Company Budget Distribution<br>by Department and Project (in $K)')

fig.update_traces(textinfo="label+value+percent parent", textfont_size=10)
fig.update_layout(height=700, width=1000)
fig.show()

print("Circular treemap completed")
print("\nAnalysis:")
print("- Engineering has the largest budget (36%)")
print("- Product Development is the biggest project ($3,500K)")
print("- Research accounts for 20% of the total budget")

Creating Circular Treemap...


Circular treemap completed

Analysis:
- Engineering has the largest budget (36%)
- Product Development is the biggest project ($3,500K)
- Research accounts for 20% of the total budget


In [3]:
dept_budget = budget_data.groupby('department')['budget'].agg(['sum', 'count', 'mean']).reset_index()

fig_eff = px.scatter(
    dept_budget,
    x='sum',
    y='mean',
    size='count',
    color='department',
    text='department',
    color_discrete_sequence=px.colors.qualitative.Set2,
    title='Department Budget Efficiency: Total vs. Average per Project'
)

fig_eff.update_traces(textposition='top center')
fig_eff.update_layout(
    xaxis_title="Total Department Budget ($K)",
    yaxis_title="Average Budget per Project ($K)",
    height=600,
    width=900
)

fig_eff.show()

### Insights
This chart helps visualize which departments achieve greater efficiency (more projects with a lower average investment) and which ones concentrate large amounts in just a few projects. It's ideal for discussions about budgetary readjustment or strategic prioritization.

# Additional Visualizations

In [4]:
import pandas as pd

# Load data
df = pd.read_csv("Demographic.csv")

# Filter only the relevant rows
df = df[df["Series Name"].isin(["Population, total", "GDP per capita (current US$)"])]

# Restructure the data (melt)
df_clean = df.melt(
    id_vars=["Country Name", "Country Code", "Series Name"],
    value_vars=['2020 [YR2020]', '2021 [YR2021]', '2022 [YR2022]', '2023 [YR2023]', '2024 [YR2024]'],
    var_name="Year",
    value_name="Value"
)

# Clean up column names and years
df_clean["Year"] = df_clean["Year"].str.extract(r"(\d{4})").astype(int)

# 🔧 CRITICAL CLEANUP HERE
# Replace non-numeric values and convert
df_clean["Value"] = (
    df_clean["Value"]
    .astype(str)
    .str.replace(",", "", regex=False)  # remove commas if present
    .replace({"..": None, "": None})   # replace ".." with NaN
)

# Convert to float (non-convertible values automatically become NaN)
df_clean["Value"] = pd.to_numeric(df_clean["Value"], errors="coerce")

# Pivot to have separate columns
df_final = df_clean.pivot_table(
    index=["Country Name", "Country Code", "Year"],
    columns="Series Name",
    values="Value"
).reset_index()

# Rename columns
df_final.columns = ["Country", "Code", "Year", "GDP_per_capita", "Population"]

# Drop rows with missing data
df_final = df_final.dropna(subset=["GDP_per_capita", "Population"])

# Filter only the most recent year
latest_year = df_final["Year"].max()
data_2024 = df_final[df_final["Year"] == latest_year]

print(f"Clean data ({data_2024.shape[0]} countries, year {latest_year})")
display(data_2024.head())


Clean data (231 countries, year 2024)


Unnamed: 0,Country,Code,Year,GDP_per_capita,Population
9,Africa Eastern and Southern,AFE,2024,1567.635839,769294618.0
14,Africa Western and Central,AFW,2024,1284.154441,521764076.0
19,Albania,ALB,2024,10011.627986,2714617.0
24,Algeria,DZA,2024,5631.179991,46814308.0
34,Andorra,AND,2024,49303.67339,81938.0


In [5]:
import plotly.express as px

fig1 = px.scatter_geo(
    data_2024,
    locations="Code",
    hover_name="Country",
    size="Population",
    color="GDP_per_capita",
    color_continuous_scale="Viridis",
    projection="natural earth",
    title=f"Market Viability Map ({latest_year})<br><sup>Bubble = Population | Color = GDP per capita</sup>"
)

fig1.update_layout(
    height=700,
    width=1100,
    geo=dict(showframe=False, showcoastlines=True),
    coloraxis_colorbar_title="GDP per Capita (US$)"
)

fig1.show()

### Insights
This map allows you to visualize where economies with high purchasing power (more intense color) and a large consumer base (big bubbles) converge. This is ideal for identifying markets with a balanced potential between purchasing power and market size.

Marco can use this to decide which countries offer an attractive balance between market size and economic capacity.

In [8]:
import pycountry_convert as pc

# Assign region to each country
def get_region(country):
    try:
        country_code = pc.country_name_to_country_alpha2(country)
        continent_code = pc.country_alpha2_to_continent_code(country_code)
        continent_name = pc.convert_continent_code_to_continent_name(continent_code)
        return continent_name
    except:
        return "Other"

data_2024["Region"] = data_2024["Country"].apply(get_region)

# Create Hierarchical Treemap
fig2 = px.treemap(
    data_2024,
    path=["Region", "Country"],
    values="Population",
    color="GDP_per_capita",
    color_continuous_scale="Plasma",
    title="Hierarchical View of Market Capacity<br><sup>Region → Country | Color = GDP per capita | Size = Population</sup>"
)

fig2.update_traces(textinfo="label+value+percent parent")
fig2.update_layout(height=700, width=1000)
fig2.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



### Insights
This Treemap offers a hierarchical view of global markets by continent. Larger countries represent bigger populations, and the color indicates the average economic level (GDP per capita). It allows for a visual assessment of which regions combine consumer volume and purchasing power. This would be key for deciding where to expand operations or locate new distribution centers.