In [1]:
import folium
import pandas as pd
import geopandas as gpd
import geoviews as gv
from bokeh.io import output_notebook
from bokeh.plotting import show

In [2]:
GAME_DATA_PATH = "vgsales_clean.csv"
MAP_CENTER = [0, 0]
MAP_ZOOM = 2
EU_LAT = 54.5260
EU_LON = 15.2551
JP_LAT = 36.2048
JP_LON = 138.2529
NA_LAT = 39.30
NA_LON = -94.71
EU_COLOR = 'yellow'
JP_COLOR = 'red'
NA_COLOR = 'blue'
LOC_DATA = {
    'EU_Sales': {'lat': 54.53, 'lon': 15.26, 'color': 'yellow',},
    'JP_Sales': {'lat': 36.20, 'lon': 138.25, 'color': 'red',},
    'NA_Sales': {'lat': 39.30, 'lon': -94.71, 'color': 'blue',},
}

In [3]:
df = pd.read_csv(GAME_DATA_PATH )
print(df.shape)
df.head()

(16291, 11)


Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16291 entries, 0 to 16290
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Rank          16291 non-null  int64  
 1   Name          16291 non-null  object 
 2   Platform      16291 non-null  object 
 3   Year          16291 non-null  float64
 4   Genre         16291 non-null  object 
 5   Publisher     16291 non-null  object 
 6   NA_Sales      16291 non-null  float64
 7   EU_Sales      16291 non-null  float64
 8   JP_Sales      16291 non-null  float64
 9   Other_Sales   16291 non-null  float64
 10  Global_Sales  16291 non-null  float64
dtypes: float64(6), int64(1), object(4)
memory usage: 1.4+ MB


In [5]:
df_melted = df.melt(id_vars=['Rank','Genre'], value_vars=['NA_Sales', 'EU_Sales', 'JP_Sales'], var_name='region', value_name='sales')
print(df_melted.shape)
df_melted.head(3)

(48873, 4)


Unnamed: 0,Rank,Genre,region,sales
0,1,Sports,NA_Sales,41.49
1,2,Platform,NA_Sales,29.08
2,3,Racing,NA_Sales,15.85


In [9]:
total_genre_sales = df_melted.groupby(['Genre','region']).sum().reset_index()
for region, vals in LOC_DATA.items():
    mask_s = total_genre_sales.region == region
    total_genre_sales.loc[mask_s, 'lat'] = vals['lat']
    total_genre_sales.loc[mask_s, 'lon'] = vals['lon']
    total_genre_sales.loc[mask_s, 'color'] = vals['color']
print(total_genre_sales.shape)
total_genre_sales.head()

(36, 7)


Unnamed: 0,Genre,region,Rank,sales,lat,lon,color
0,Action,EU_Sales,25955792,516.48,54.53,15.26,yellow
1,Action,JP_Sales,25955792,158.65,36.2,138.25,red
2,Action,NA_Sales,25955792,861.77,39.3,-94.71,blue
3,Adventure,EU_Sales,14704318,63.74,54.53,15.26,yellow
4,Adventure,JP_Sales,14704318,51.99,36.2,138.25,red


In [10]:
action_df = total_genre_sales.loc[total_genre_sales['Genre']=='Action'].copy()
print(action_df.shape)
action_df.head(3)

(3, 7)


Unnamed: 0,Genre,region,Rank,sales,lat,lon,color
0,Action,EU_Sales,25955792,516.48,54.53,15.26,yellow
1,Action,JP_Sales,25955792,158.65,36.2,138.25,red
2,Action,NA_Sales,25955792,861.77,39.3,-94.71,blue


In [11]:
# Create a map centered around the average latitude and longitude
#map_center = [action_df['lat'].mean(), action_df['lon'].mean()]
my_map=folium.Map(location=MAP_CENTER, zoom_start=MAP_ZOOM)

# Loop through the DataFrame and add CircleMarkers to the map
for idx, row in action_df.iterrows():
    folium.CircleMarker(
        location=[row['lat'], row['lon']],  # Set the location based on lat and lon
        radius=row['sales'] / 10,  # Size is based on the 'size' column, divided to scale it properly
        color=row['color'],  # Marker color
        fill=True,
        fill_color=row['color'],  # Fill color
        fill_opacity=0.6,
        # popup=f"Size: {row['size']}",  # Add popup with size info
    ).add_to(my_map)

my_map

In [None]:
row


In [None]:
action_df

In [None]:
kids = ['Bob', 'Doug', 'Pam']
for one_kid in kids:
    print(kids)

In [None]:
[action_df['lat']], [action_df['lon']]

In [None]:
sports_df = total_genre_sales.loc[total_genre_sales['Genre']=='Sports'].copy()
sports_df['lat']= [54.5260, 36.2048, 54.5260]
sports_df['lon']= [15.2551, 138.2529, 138.2529]
print(sports_df)

In [None]:
shooter_df = total_genre_sales.loc[total_genre_sales['Genre']=='Shooter'].copy()
shooter_df['lat']= [54.5260, 36.2048, 54.5260]
shooter_df['lon']= [15.2551, 138.2529, 138.2529]
print(shooter_df)

In [None]:
#Which country has the highest percentage of gamers contributing to the top 3 genres?
#Top 3 Genres = Action, Shooter, Sports
#Maps
#NA= 54.5260° N, 105.2551° W
#EU= 54.5260° N, 15.2551° E
#JP= 36.2048° N, 138.2529° E
