In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import  mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge, Lasso
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor
from sklearn.decomposition import PCA
from keras import models
from keras import layers
from keras import optimizers
from keras.layers import Dropout
from keras import regularizers
from keras.callbacks import EarlyStopping

In [2]:
smdb = pd.read_csv('../Data/vgsales-12-4-2019-short.csv')
sglobal = pd.read_csv('../Data/vgsalesGlobale2.csv')
pd.set_option('display.max_columns', None)

In [3]:
sglobal_clean = sglobal.dropna()

In [4]:
sglobal_clean = sglobal_clean.set_index("Rank") 

In [5]:
sglobal_clean.head()

Unnamed: 0_level_0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [6]:
remake = np.array(sglobal_clean['Name'].duplicated())
remake_df = pd.DataFrame(remake, columns=['Remake'], index=sglobal_clean.index)
remake_df

Unnamed: 0_level_0,Remake
Rank,Unnamed: 1_level_1
1,False
2,False
3,False
4,False
5,False
...,...
16596,False
16597,True
16598,True
16599,False


In [8]:
sglobal_clean = pd.concat((sglobal_clean,remake_df), axis=1)
sglobal_clean.head()

Unnamed: 0_level_0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Remake,Remake
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74,False,False
2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,False,False
3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82,False,False
4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0,False,False
5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37,False,False


In [9]:
sglobal_clean['Remake'] = sglobal_clean['Remake'].astype(int)
sglobal_clean.head()

Unnamed: 0_level_0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Remake,Remake
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74,0,0
2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,0,0
3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82,0,0
4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0,0,0
5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37,0,0


In [10]:
sglobal_smdf = sglobal_clean.merge(smdb, on= 'Rank', how="inner", 
                                   left_index=True, 
                                   suffixes=(None, "_DROP")).filter(regex='^(?!.*_DROP)')
sglobal_smdf.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Remake,Remake.1,ESRB_Rating,Developer,Critic_Score,User_Score,Total_Shipped,PAL_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74,0,0,E,Nintendo EAD,7.7,,82.86,
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24,0,0,,Nintendo EAD,10.0,,40.24,
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82,0,0,E,Nintendo EAD,8.2,9.1,37.14,
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0,0,0,,PUBG Corporation,,,36.6,
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37,0,0,E,Nintendo EAD,8.0,8.8,33.09,


In [11]:
sglobal_smdf.drop(columns=['Developer', 'Critic_Score', 'User_Score', 
                           'Total_Shipped', 'PAL_Sales','NA_Sales', 
                           'EU_Sales', 'JP_Sales', 'Other_Sales', 'Name'], axis='columns', inplace=True)

In [12]:
sglobal_smdf.head()

Unnamed: 0,Rank,Platform,Year,Genre,Publisher,Global_Sales,Remake,Remake.1,ESRB_Rating
0,1,Wii,2006.0,Sports,Nintendo,82.74,0,0,E
1,2,NES,1985.0,Platform,Nintendo,40.24,0,0,
2,3,Wii,2008.0,Racing,Nintendo,35.82,0,0,E
3,4,Wii,2009.0,Sports,Nintendo,33.0,0,0,
4,5,GB,1996.0,Role-Playing,Nintendo,31.37,0,0,E


In [13]:
sglobal_smdf.dropna(inplace=True)

In [14]:
sglobal_smdf.set_index('Rank')

Unnamed: 0_level_0,Platform,Year,Genre,Publisher,Global_Sales,Remake,Remake,ESRB_Rating
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,Wii,2006.0,Sports,Nintendo,82.74,0,0,E
3,Wii,2008.0,Racing,Nintendo,35.82,0,0,E
5,GB,1996.0,Role-Playing,Nintendo,31.37,0,0,E
6,GB,1989.0,Puzzle,Nintendo,30.26,0,0,E
7,DS,2006.0,Platform,Nintendo,30.01,0,0,E
...,...,...,...,...,...,...,...,...
16589,DS,2009.0,Adventure,Deep Silver,0.01,1,1,E
16595,DS,2008.0,Simulation,Destineer,0.01,0,0,T
16596,GBA,2002.0,Platform,Kemco,0.01,0,0,T
16598,PS2,2008.0,Racing,Activision,0.01,1,1,E10


In [15]:
y = sglobal_smdf['Global_Sales']
X = sglobal_smdf.drop('Global_Sales', axis=1)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.20, random_state=42)

In [17]:
X_train.set_index('Rank')
X_test.set_index('Rank')

Unnamed: 0_level_0,Platform,Year,Genre,Publisher,Remake,Remake,ESRB_Rating
Rank,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4080,Wii,2007.0,Action,Electronic Arts,1,1,E10
10305,DS,2009.0,Adventure,Nintendo,0,0,E
15384,PS2,2006.0,Platform,Natsume,0,0,M
3140,PSP,2007.0,Action,Vivendi Games,0,0,T
8477,X360,2009.0,Adventure,Codemasters,0,0,E
...,...,...,...,...,...,...,...
15312,PC,2009.0,Strategy,CDV Software Entertainment,0,0,T
8453,PC,2012.0,Simulation,Electronic Arts,0,0,E
11641,DS,2005.0,Misc,505 Games,0,0,E
613,PS3,2013.0,Action,Square Enix,0,0,M


In [18]:
ohe = OneHotEncoder(handle_unknown='ignore')

#OHE X_train
ohe.fit(X_train[['Platform', 'Genre', 'Publisher', 'ESRB_Rating']])

ohe_cats = pd.DataFrame(ohe.transform(X_train[['Platform', 'Genre', 'Publisher','ESRB_Rating']]).todense(), 
                        columns=ohe.get_feature_names(), index=X_train.index)

#OHE X_test
ohe_cats_test= pd.DataFrame(ohe.transform(X_test[['Platform', 'Genre', 'Publisher', 'ESRB_Rating']]).todense(), 
                        columns=ohe.get_feature_names(), index=X_test.index)

In [19]:
# Dropping one hotted columns
X_test.drop(columns=['Platform', 'Genre', 'Publisher','ESRB_Rating'], axis=1, inplace=True)

X_train.drop(columns=['Platform', 'Genre', 'Publisher', 'ESRB_Rating'], axis=1, inplace=True)

In [20]:
# Adding the columns after one hotting
X_train_ohe = pd.concat([X_train, ohe_cats], axis=1)
X_train_ohe.head()

X_test_ohe = pd.concat([X_test, ohe_cats_test], axis=1)
X_test_ohe.head()

Unnamed: 0,Rank,Year,Remake,Remake.1,x0_2600,x0_3DS,x0_DC,x0_DS,x0_GB,x0_GBA,x0_GC,x0_GEN,x0_GG,x0_N64,x0_NES,x0_NG,x0_PC,x0_PS,x0_PS2,x0_PS3,x0_PS4,x0_PSP,x0_PSV,x0_SAT,x0_SCD,x0_SNES,x0_TG16,x0_WS,x0_Wii,x0_WiiU,x0_X360,x0_XB,x0_XOne,x1_Action,x1_Adventure,x1_Fighting,x1_Misc,x1_Platform,x1_Puzzle,x1_Racing,x1_Role-Playing,x1_Shooter,x1_Simulation,x1_Sports,x1_Strategy,x2_10TACLE Studios,x2_1C Company,x2_20th Century Fox Video Games,x2_2D Boy,x2_3DO,x2_505 Games,x2_5pb,x2_7G//AMES,x2_989 Sports,x2_989 Studios,x2_AQ Interactive,x2_ASC Games,x2_ASCII Entertainment,x2_ASCII Media Works,x2_ASK,x2_Abylight,x2_Acclaim Entertainment,x2_Accolade,x2_Ackkstudios,x2_Acquire,x2_Activision,x2_Activision Blizzard,x2_Activision Value,x2_Adeline Software,x2_Aerosoft,x2_Agatsuma Entertainment,x2_Agetec,x2_Aksys Games,x2_Alawar Entertainment,x2_Alchemist,x2_Alternative Software,x2_Altron,x2_American Softworks,x2_Angel Studios,x2_Answer Software,x2_Aqua Plus,x2_Arc System Works,x2_Arena Entertainment,x2_Aria,x2_Arika,x2_ArtDink,x2_Aruze Corp,x2_Ascaron Entertainment,x2_Ascaron Entertainment GmbH,x2_Asgard,x2_Asmik Ace Entertainment,x2_Asmik Corp,x2_Aspyr,x2_Astragon,x2_Asylum Entertainment,x2_Atari,x2_Atlus,x2_Avalon Interactive,x2_Avanquest,x2_Avanquest Software,x2_Axela,x2_BAM! Entertainment,x2_BMG Interactive Entertainment,x2_BPS,x2_Banpresto,x2_Benesse,x2_Bethesda Softworks,x2_Big Ben Interactive,x2_Big Fish Games,x2_Bigben Interactive,x2_Black Bean Games,x2_Black Label Games,x2_Blast! Entertainment Ltd,x2_Blue Byte,x2_Bohemia Interactive,x2_Bomb,x2_Boost On,x2_Brash Entertainment,x2_Broccoli,x2_CBS Electronics,x2_CCP,x2_CDV Software Entertainment,x2_CPG Products,x2_CTO SpA,x2_Capcom,x2_Cave,x2_ChunSoft,x2_City Interactive,x2_Codemasters,x2_Codemasters Online,x2_CokeM Interactive,x2_Coleco,x2_Comfort,x2_Compile,x2_Compile Heart,x2_Conspiracy Entertainment,x2_Core Design Ltd.,x2_Crave Entertainment,x2_Creative Core,x2_Crystal Dynamics,x2_Culture Brain,x2_Culture Publishers,x2_CyberFront,x2_Cygames,x2_D3Publisher,x2_DHM Interactive,x2_DSI Games,x2_DTP Entertainment,x2_Daito,x2_Data Age,x2_Deep Silver,"x2_Destination Software, Inc",x2_Destineer,x2_Detn8 Games,x2_Devolver Digital,x2_Disney Interactive Studios,x2_DreamCatcher Interactive,x2_DreamWorks Interactive,x2_Dusenberry Martin Racing,x2_EA Games,x2_ESP,x2_Ecole,x2_Edia,x2_Eidos Interactive,x2_Electronic Arts,x2_Electronic Arts Victor,x2_Empire Interactive,x2_Encore,x2_Enix Corporation,x2_Enjoy Gaming ltd.,x2_Enterbrain,x2_Epoch,x2_Ertain,x2_Essential Games,x2_Evolved Games,x2_Excalibur Publishing,x2_Experience Inc.,x2_Extreme Entertainment Group,x2_Falcom Corporation,x2_Fields,x2_Flashpoint Games,x2_Flight-Plan,x2_Focus Home Interactive,x2_Focus Multimedia,x2_Foreign Media Games,x2_Fortyfive,x2_Fox Interactive,x2_From Software,x2_FuRyu,x2_Funbox Media,x2_Funsta,x2_G.Rev,x2_GN Software,x2_GOA,x2_GSP,x2_GT Interactive,x2_Gainax Network Systems,x2_Game Arts,x2_Game Factory,x2_Game Life,x2_GameMill Entertainment,x2_GameTek,x2_Gamebridge,x2_Gamecock,x2_Gameloft,x2_Gathering of Developers,x2_General Entertainment,x2_Genki,x2_Genterprise,x2_Ghostlight,x2_Global A Entertainment,x2_Global Star,x2_Gotham Games,x2_Graffiti,x2_Grand Prix Games,x2_Graphsim Entertainment,x2_Gremlin Interactive Ltd,x2_Griffin International,x2_Groove Games,x2_GungHo,x2_Gust,x2_HAL Laboratory,x2_HMH Interactive,x2_Hackberry,x2_Hamster Corporation,x2_Happinet,x2_Harmonix Music Systems,x2_Hasbro Interactive,x2_Headup Games,x2_Hect,x2_Hello Games,x2_Her Interactive,x2_Hip Interactive,x2_Home Entertainment Suppliers,x2_Hudson Entertainment,x2_Hudson Soft,x2_Human Entertainment,x2_HuneX,x2_IE Institute,x2_Iceberg Interactive,x2_Idea Factory,x2_Idea Factory International,x2_Ignition Entertainment,x2_Illusion Softworks,x2_Imagic,x2_Imagineer,x2_Imax,x2_Indie Games,x2_Infogrames,x2_Insomniac Games,x2_Interchannel,x2_Intergrow,x2_Interplay,x2_Irem Software Engineering,x2_JVC,x2_Jack of All Games,x2_Jaleco,x2_Jester Interactive,x2_JoWood Productions,x2_Jorudan,x2_Just Flight,x2_KID,x2_KSS,x2_Kadokawa Games,x2_Kadokawa Shoten,x2_Kaga Create,x2_Kalypso Media,x2_Kando Games,x2_Karin Entertainment,x2_Kemco,x2_King Records,x2_Knowledge Adventure,x2_Koch Media,x2_Konami Digital Entertainment,x2_LEGO Media,x2_LSP Games,x2_Laguna,x2_Level 5,x2_Lexicon Entertainment,x2_Licensed 4U,x2_Lighthouse Interactive,x2_Liquid Games,x2_Little Orbit,x2_LucasArts,x2_MC2 Entertainment,x2_MLB.com,x2_MTO,x2_MTV Games,x2_Mad Catz,x2_Magix,x2_Majesco Entertainment,x2_Marvel Entertainment,x2_Marvelous Entertainment,x2_Marvelous Interactive,x2_Masque Publishing,x2_Mastertronic,x2_Mastiff,x2_Mattel Interactive,x2_Maximum Family Games,x2_Maxis,x2_Media Entertainment,x2_Media Factory,x2_Media Works,x2_Men-A-Vision,x2_Mentor Interactive,x2_Mercury Games,x2_Merscom LLC,x2_Metro 3D,x2_Michaelsoft,x2_Micro Cabin,x2_Microids,x2_Microprose,x2_Microsoft Game Studios,x2_Midas Interactive Entertainment,x2_Midway Games,x2_Milestone S.r.l,x2_Milestone S.r.l.,x2_Minato Station,x2_Mindscape,x2_Mirai Shounen,x2_Misawa,x2_Mojang,x2_Monte Christo Multimedia,x2_Moss,x2_Mud Duck Productions,x2_Mumbo Jumbo,x2_Myelin Media,x2_Mystique,x2_NCS,x2_NCSoft,x2_NDA Productions,x2_NEC,x2_NEC Interchannel,x2_Namco Bandai Games,x2_Natsume,x2_Neko Entertainment,x2_NetRevo,x2_New,x2_New World Computing,x2_NewKidCo,x2_Nihon Falcom Corporation,x2_Nintendo,x2_Nippon Amuse,x2_Nippon Columbia,x2_Nippon Ichi Software,x2_Nippon Telenet,x2_Nitroplus,x2_Nobilis,x2_Nordcurrent,x2_Nordic Games,x2_NovaLogic,x2_O-Games,x2_O3 Entertainment,x2_Ocean,x2_Office Create,x2_Oxygen Interactive,x2_P2 Games,x2_PM Studios,x2_PQube,x2_Pacific Century Cyber Works,x2_Pack-In-Video,x2_Palcom,x2_Panther Software,x2_Paon,x2_Paradox Development,x2_Paradox Interactive,x2_Parker Bros.,x2_Performance Designed Products,x2_Phantagram,x2_Phantom EFX,x2_Phenomedia,x2_Phoenix Games,x2_Pinnacle,x2_Pioneer LDC,x2_Play It,x2_PlayV,x2_Playlogic Game Factory,x2_Pony Canyon,x2_PopCap Games,x2_PopTop Software,x2_Popcorn Arcade,x2_Princess Soft,x2_Prototype,x2_Psygnosis,x2_Quelle,x2_Quinrose,x2_Quintet,x2_RTL,x2_Rage Software,x2_Rebellion,x2_Rebellion Developments,x2_Red Storm Entertainment,x2_RedOctane,x2_Reef Entertainment,x2_Revolution (Japan),x2_Revolution Software,x2_Rising Star Games,x2_Riverhillsoft,x2_Rocket Company,x2_Rondomedia,x2_Russel,x2_SCS Software,x2_SCi,x2_SNK,x2_SNK Playmore,x2_Sammy Corporation,x2_Scholastic Inc.,x2_Screenlife,x2_Sega,x2_Seta Corporation,x2_Shogakukan,x2_Slightly Mad Studios,x2_Slitherine Software,x2_Societa,x2_Sony Computer Entertainment,x2_Sony Computer Entertainment America,x2_Sony Computer Entertainment Europe,x2_Sony Music Entertainment,x2_Sony Online Entertainment,x2_SouthPeak Games,x2_Spike,x2_Square,x2_Square Enix,x2_SquareSoft,x2_Starfish,x2_Starpath Corp.,x2_Sting,x2_Storm City Games,x2_Success,x2_Sunrise Interactive,x2_Sunsoft,x2_Sweets,x2_Swing! Entertainment,x2_Syscom,x2_System 3,x2_System 3 Arcade Software,x2_System Soft,x2_TDK Core,x2_TDK Mediactive,x2_TGL,x2_THQ,x2_Taito,x2_Takara,x2_Takara Tomy,x2_Take-Two Interactive,x2_TalonSoft,x2_Team17 Software,x2_TechnoSoft,x2_Technos Japan Corporation,x2_Tecmo Koei,x2_Telegames,x2_Telltale Games,x2_Telstar,x2_Tetris Online,x2_The Adventure Company,x2_The Learning Company,x2_Time Warner Interactive,x2_Titus,x2_Tivola,x2_Tommo,x2_Tomy Corporation,x2_TopWare Interactive,x2_Touchstone,x2_Tradewest,x2_Trion Worlds,x2_Tru Blu Entertainment,x2_Tryfirst,x2_U.S. Gold,x2_UEP Systems,x2_UFO Interactive,x2_Ubisoft,x2_Ubisoft Annecy,x2_Universal Gamex,x2_Universal Interactive,x2_Unknown,x2_Valcon Games,x2_ValuSoft,x2_Valve Software,x2_Victor Interactive,x2_Video System,x2_Vir2L Studios,x2_Virgin Interactive,x2_Virtual Play Games,x2_Vivendi Games,x2_Wanadoo,x2_Wargaming.net,x2_Warner Bros. Interactive Entertainment,x2_WayForward Technologies,x2_Westwood Studios,x2_XS Games,x2_Xicat Interactive,x2_Xplosiv,x2_Xseed Games,x2_Yacht Club Games,x2_Yeti,x2_Yuke's,x2_Zenrin,x2_Zoo Digital Publishing,x2_Zoo Games,x2_Zushi Games,x2_bitComposer Games,x2_dramatic create,x2_id Software,x2_inXile Entertainment,"x2_mixi, Inc",x2_responDESIGN,x3_E,x3_E10,x3_EC,x3_M,x3_RP,x3_T
4079,4080,2007.0,1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
10304,10305,2009.0,0,0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
15383,15384,2006.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3139,3140,2007.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
8476,8477,2009.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0


## Running models

### Linear Regression

In [53]:
linreg = LinearRegression()
ss = StandardScaler()

#Scaling the data
X_train_ohe_ss = ss.fit_transform(X_train_ohe)
X_test_ohe_ss = ss.transform(X_test_ohe)

linreg.fit(X_train_ohe_ss, y_train)
print(linreg.score(X_train_ohe, y_train))
print(mean_absolute_error(y_train, linreg.predict(X_train_ohe)))

-8.687211077532722e+24
4276141357356.104


In [54]:
cross_val_score(linreg, X_train_ohe_ss, y_train, cv = 3).mean()

-3.5006369899078746e+28

### Ridge/Lasso

#### Ridge

In [62]:
ridge = Ridge(alpha=4.0, random_state=42)


ridge.fit(X_train_ohe_ss, y_train)
print(ridge.score(X_train_ohe_ss, y_train))
print(mean_absolute_error(y_train, ridge.predict(X_train_ohe_ss)))

0.2908476345447051
0.5129955466313806


In [59]:
cross_val_score(ridge, X_train_ohe_ss, y_train, cv = 3).mean()

0.26337353039064865

#### Lasso

In [61]:
lasso = Lasso(alpha=4.0, random_state=42)


lasso.fit(X_train_ohe_ss, y_train)
print(lasso.score(X_train_ohe_ss, y_train))
print(mean_absolute_error(y_train, lasso.predict(X_train_ohe_ss)))

0.0
0.6411120096338159


In [43]:
cross_val_score(lasso, X_train_ohe_ss, y_train, cv = 3).mean()

-0.0002905870588070976

### Decision Tree Regressor

In [50]:
dtr = DecisionTreeRegressor(random_state=42, max_depth=30, max_features=500, min_samples_split=10, max_leaf_nodes=200)

dtr.fit(X_train_ohe, y_train)

print(dtr.score(X_train_ohe, y_train))
print(mean_absolute_error(y_train, dtr.predict(X_train_ohe_ss)))

0.9972901128002867
30.37309024390244


In [51]:
cross_val_score(dtr, X_train_ohe_ss, y_train, cv = 3).mean()

0.9882157862186065

In [56]:
dtr.score(X_test_ohe_ss, y_test)

-177.216878147633

In [65]:
param_grid={'max_depth':[10, 30 ,50],
            'max_features':[100, 300, 500],
            'min_samples_split':[10, 50, 100],
            'max_leaf_nodes': [50, 100, 200],
            'min_samples_leaf':[1,10,50]}

gridsearch = GridSearchCV(dtr, param_grid, cv=3)

gridsearch.fit(X_train_ohe, y_train);

gridsearch.best_params_

{'max_depth': 30,
 'max_features': 500,
 'max_leaf_nodes': 200,
 'min_samples_leaf': 1,
 'min_samples_split': 10}

### KNeighbors Regressor

In [30]:
knr = KNeighborsRegressor(random_state=42)

knr.fit(X_train_ohe, y_train)

print(knr.score(X_train_ohe, y_train))
print(mean_absolute_error(y_train, knr.predict(X_train_ohe_ss)))

0.9970241831011339
8.285319512195125


In [31]:
cross_val_score(knr, X_train_ohe_ss, y_train, cv = 3).mean()

0.09920080527554069

In [68]:
param_grid={'n_neighbors':[100, 500 ,1000],
            'leaf_size':[30, 100, 300],
            'p':[1, 2]
            }

gridsearch = GridSearchCV(knr, param_grid, cv=3)

gridsearch.fit(X_train_ohe, y_train);

gridsearch.best_params_

{'leaf_size': 300, 'n_neighbors': 100, 'p': 2}

### Random Forest Regressor

In [34]:
rfr = RandomForestRegressor(random_state=42)

rfr.fit(X_train_ohe, y_train)

print(rfr.score(X_train_ohe, y_train))
print(mean_absolute_error(y_train, rfr.predict(X_train_ohe_ss)))

0.9995676149742789
30.86648402439026


In [35]:
cross_val_score(rfr, X_train_ohe_ss, y_train, cv = 3).mean()

0.989592293256953

### Adaboost/Gradient Boost Regressors

#### Adaboost 

In [36]:
abr = AdaBoostRegressor(random_state=42)

abr.fit(X_train_ohe, y_train)

print(abr.score(X_train_ohe, y=y_train))
print(mean_absolute_error(y_train, abr.predict(X_train_ohe_ss)))

0.9469413874240099
35.224022357723406


In [37]:
cross_val_score(rfr, X_train_ohe_ss, y_train, cv = 3).mean()

0.989592293256953

#### Gradient Boost

In [38]:
gbr = GradientBoostingRegressor(random_state=42)

gbr.fit(X_train_ohe, y=y_train)

print(gbr.score(X_train_ohe, y=y_train))
print(mean_absolute_error(y_train, gbr.predict(X_train_ohe_ss)))

0.9999368011152284
35.11892170239581


In [39]:
cross_val_score(rfr, X_train_ohe_ss, y_train, cv = 3).mean()

0.989592293256953