In [2]:
import pandas as pd
from sqlalchemy import create_engine


In [3]:
sales_file = 'Resources/Video_Games_Sales_as_at_22_Dec_2016.csv'
sales_df = pd.read_csv(sales_file)
sales_df.head()

Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Developer,Rating
0,Wii Sports,Wii,2006.0,Sports,Nintendo,41.36,28.96,3.77,8.45,82.53,76.0,51.0,8.0,322.0,Nintendo,E
1,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,,,,,,
2,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.68,12.76,3.79,3.29,35.52,82.0,73.0,8.3,709.0,Nintendo,E
3,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.61,10.93,3.28,2.95,32.77,80.0,73.0,8.0,192.0,Nintendo,E
4,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37,,,,,,


In [24]:
#Kept 6/16 Columns from original dataset
sales_condensed = sales_df[["Publisher","Name","Year_of_Release","Global_Sales","Critic_Score","User_Score"]]

#Filter for Nintendo Only
nintendo_df = sales_condensed.loc[sales_condensed["Publisher"]=="Nintendo"]

#Filter for Years 2010-2016
nintendo_df =  nintendo_df.loc[(nintendo_df["Year_of_Release"] > 2009) &
                                (nintendo_df["Year_of_Release"]<2017)]

#Drop rows where year is null
nintendo_df.dropna(subset='Year_of_Release')

#Changing Year to Int type
nintendo_df['Year_of_Release'] =nintendo_df['Year_of_Release'].astype('Int32')
nintendo_df.dtypes

Publisher           object
Name                object
Year_of_Release      Int32
Global_Sales       float64
Critic_Score       float64
User_Score         float64
dtype: object

In [25]:
#Group by Year 
nintendo_grouped = nintendo_df.groupby(["Year_of_Release"])
nintendo_grouped.head()

Unnamed: 0,Publisher,Name,Year_of_Release,Global_Sales,Critic_Score,User_Score
27,Nintendo,Pokemon Black/Pokemon White,2010,15.14,,
33,Nintendo,Pokemon X/Pokemon Y,2013,14.6,,
40,Nintendo,Mario Kart 7,2011,12.66,85.0,8.2
47,Nintendo,Pokemon Omega Ruby/Pokemon Alpha Sapphire,2014,11.68,,
53,Nintendo,Super Mario 3D Land,2011,10.81,90.0,8.4
62,Nintendo,New Super Mario Bros. 2,2012,9.9,78.0,7.2
73,Nintendo,Animal Crossing: New Leaf,2012,9.16,88.0,8.7
79,Nintendo,Wii Party,2010,8.38,68.0,7.4
83,Nintendo,Pokemon Black 2/Pokemon White 2,2012,8.07,,
96,Nintendo,Super Smash Bros. for Wii U and 3DS,2014,7.55,,


In [26]:
#Take count of "Name" = Games released that year
total_games  = nintendo_grouped[["Name"]].count()

#Take Sum of Total Sales 
total_sales  = nintendo_grouped[["Global_Sales"]].sum()

#Take Average of Critic Score & User Score
avg_critic_score = nintendo_grouped[["Critic_Score"]].mean()
avg_user_score = nintendo_grouped[["User_Score"]].mean()



In [27]:
#Create transformed nintendo DF by year
nintendo_transformed_df = pd.concat([total_games,total_sales,avg_critic_score,avg_user_score],axis='columns')
#Rename Columns
nintendo_transformed_df = nintendo_transformed_df.rename(columns={ 
                                                            "Name": "Total Games Released",
                                                          "Global_Sales": "Sales (mil)",
                                                          "Critic_Score": "Average Critic Score (0-100)",
                                                          "User_Score": "Average User Score (0-10)"})


nintendo_transformed_df

Unnamed: 0_level_0,Total Games Released,Sales (mil),Average Critic Score (0-100),Average User Score (0-10)
Year_of_Release,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010,28,59.74,74.944444,7.55625
2011,26,51.69,72.4375,7.7375
2012,31,56.11,74.4,7.46
2013,23,53.48,78.166667,7.983333
2014,20,48.88,76.133333,7.82
2015,32,27.61,70.736842,7.489474
2016,14,11.74,74.5,7.3125


In [34]:
nintendo_transformed_df=nintendo_transformed_df.reset_index()


In [35]:

nintendo_transformed_df.dtypes

Year_of_Release                   Int32
Total Games Released              int64
Sales (mil)                     float64
Average Critic Score (0-100)    float64
Average User Score (0-10)       float64
dtype: object