In [1]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import plotly.express as px
import plotly.io as pio
pio.renderers.default = "notebook"

In [2]:
df = pd.read_csv("data_science_student_marks.csv")
df

Unnamed: 0,student_id,location,age,sql_marks,excel_marks,python_marks,power_bi_marks,english_marks
0,4,Sydney,24,95,99,87,82,75
1,5,Tokyo,24,99,95,89,86,82
2,6,Berlin,22,72,70,99,79,77
3,7,London,23,97,90,74,72,85
4,8,Tokyo,22,91,71,79,80,75
...,...,...,...,...,...,...,...,...
492,496,New York,23,71,92,98,78,88
493,497,Los Angeles,20,90,100,90,78,73
494,498,Paris,22,78,80,78,71,70
495,499,Los Angeles,21,96,93,78,78,82


In [3]:
df = pd.DataFrame(df)
df

Unnamed: 0,student_id,location,age,sql_marks,excel_marks,python_marks,power_bi_marks,english_marks
0,4,Sydney,24,95,99,87,82,75
1,5,Tokyo,24,99,95,89,86,82
2,6,Berlin,22,72,70,99,79,77
3,7,London,23,97,90,74,72,85
4,8,Tokyo,22,91,71,79,80,75
...,...,...,...,...,...,...,...,...
492,496,New York,23,71,92,98,78,88
493,497,Los Angeles,20,90,100,90,78,73
494,498,Paris,22,78,80,78,71,70
495,499,Los Angeles,21,96,93,78,78,82


In [5]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import plotly.express as px
import plotly.io as pio

#Group by city and calculate average marks
df_city = df.groupby("location", as_index=False).agg({
    "sql_marks": "mean",
    "excel_marks": "mean",
    "python_marks": "mean",
    "power_bi_marks": "mean",
    "english_marks": "mean"
})

#Calculate the overall average of all marks for each city
df_city["avg_marks"] = df_city[[
    "sql_marks", "excel_marks", "python_marks", "power_bi_marks", "english_marks"
]].mean(axis=1)

# ðŸ”¹ Round all numeric columns to 2 decimal places
df_city = df_city.round({
    "sql_marks": 2,
    "excel_marks": 2,
    "python_marks": 2,
    "power_bi_marks": 2,
    "english_marks": 2,
    "avg_marks": 2
})

#Get latitude and longitude for each city (quick version)
geolocator = Nominatim(user_agent="geoapi")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=0.2)

df_city["geo"] = df_city["location"].apply(geocode)
df_city["lat"] = df_city["geo"].apply(lambda loc: loc.latitude if loc else None)
df_city["lon"] = df_city["geo"].apply(lambda loc: loc.longitude if loc else None)
df_city.drop(columns=["geo"], inplace=True)

#Create the bubble map
pio.renderers.default = "notebook_connected"  # for Jupyter

fig = px.scatter_geo(
    df_city,
    lat="lat",
    lon="lon",
    color="avg_marks",                # color = average of all marks
    hover_name="location",
    hover_data={
        "avg_marks": True,
        "sql_marks": True,
        "excel_marks": True,
        "python_marks": True,
        "power_bi_marks": True,
        "english_marks": True,
        "lat": False,
        "lon": False
    },
    title="Average Student Performance by City",
    size=[15]*len(df_city),           # fixed bubble size
    projection="natural earth",
    color_continuous_scale="Viridis"  # color gradient (blueâ†’greenâ†’yellow)
)

fig.show()
