In [181]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import seaborn as sns
from scipy import stats
from sklearn import metrics
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score

pd.options.display.max_columns = 999  # Allos us to display all columns
pd.options.display.max_rows = 999  # Allos us to display all columns

df_train = pd.read_csv('./datasets/cleaned_train.csv',keep_default_na=False)
df_test = pd.read_csv('./datasets/cleaned_test.csv',keep_default_na=False)

df_test.columns = df_test.columns.str.lower() # column names to lower case

In [182]:
df_train['electrical'].value_counts()

5    1868
4     140
3      35
2       7
1       1
Name: electrical, dtype: int64

In [183]:
df_test['electrical'].value_counts()

5.0    813
4.0     48
3.0     15
2.0      1
0.0      1
Name: electrical, dtype: int64

In [184]:
df_test[df_test['electrical']=='']

Unnamed: 0,id,pid,ms subclass,lot frontage,lot area,street,lot shape,utilities,land slope,overall qual,overall cond,year built,year remod/add,mas vnr area,exter qual,exter cond,bsmt qual,bsmt cond,bsmtfin type 1,bsmtfin sf 1,bsmtfin type 2,bsmtfin sf 2,bsmt unf sf,total bsmt sf,heating qc,central air,electrical,1st flr sf,2nd flr sf,low qual fin sf,gr liv area,bsmt full bath,bsmt half bath,full bath,half bath,bedroom abvgr,kitchen abvgr,kitchen qual,totrms abvgrd,functional,fireplaces,fireplace qu,garage yr blt,garage finish,garage cars,garage area,garage qual,garage cond,paved drive,wood deck sf,open porch sf,enclosed porch,3ssn porch,screen porch,pool area,pool qc,fence,misc feature,misc val,mo sold,yr sold,neighborhood_blmngtn,neighborhood_blueste,neighborhood_brdale,neighborhood_brkside,neighborhood_clearcr,neighborhood_collgcr,neighborhood_crawfor,neighborhood_edwards,neighborhood_gilbert,neighborhood_greens,neighborhood_idotrr,neighborhood_meadowv,neighborhood_mitchel,neighborhood_names,neighborhood_npkvill,neighborhood_nwames,neighborhood_noridge,neighborhood_nridght,neighborhood_oldtown,neighborhood_swisu,neighborhood_sawyer,neighborhood_sawyerw,neighborhood_somerst,neighborhood_stonebr,neighborhood_timber,neighborhood_veenker,condition 1_artery,condition 1_feedr,condition 1_norm,condition 1_posa,condition 1_posn,condition 1_rrae,condition 1_rran,condition 1_rrne,condition 1_rrnn,condition 2_feedr,condition 2_norm,condition 2_posa,bldg type_1fam,bldg type_2fmcon,bldg type_duplex,bldg type_twnhs,bldg type_twnhse,house style_1.5fin,house style_1.5unf,house style_1story,house style_2.5fin,house style_2.5unf,house style_2story,house style_sfoyer,house style_slvl,alley_grvl,alley_na,alley_pave,bsmt exposure_av,bsmt exposure_gd,bsmt exposure_mn,bsmt exposure_na,bsmt exposure_no,exterior 1st_asbshng,exterior 1st_asphshn,exterior 1st_brkcomm,exterior 1st_brkface,exterior 1st_cemntbd,exterior 1st_hdboard,exterior 1st_metalsd,exterior 1st_plywood,exterior 1st_precast,exterior 1st_stucco,exterior 1st_vinylsd,exterior 1st_wd sdng,exterior 1st_wdshing,exterior 2nd_asbshng,exterior 2nd_asphshn,exterior 2nd_brk cmn,exterior 2nd_brkface,exterior 2nd_cblock,exterior 2nd_cmentbd,exterior 2nd_hdboard,exterior 2nd_imstucc,exterior 2nd_metalsd,exterior 2nd_other,exterior 2nd_plywood,exterior 2nd_precast,exterior 2nd_stucco,exterior 2nd_vinylsd,exterior 2nd_wd sdng,exterior 2nd_wd shng,foundation_brktil,foundation_cblock,foundation_pconc,foundation_slab,foundation_stone,foundation_wood,garage type_2types,garage type_attchd,garage type_basment,garage type_builtin,garage type_carport,garage type_detchd,garage type_na,heating_floor,heating_gasa,heating_gasw,heating_grav,land contour_bnk,land contour_hls,land contour_low,land contour_lvl,roof matl_compshg,roof matl_metal,roof matl_roll,roof matl_tar&grv,roof matl_wdshake,roof matl_wdshngl,roof style_flat,roof style_gable,roof style_gambrel,roof style_hip,roof style_mansard,roof style_shed,sale type_cod,sale type_cwd,sale type_con,sale type_conld,sale type_conli,sale type_conlw,sale type_new,sale type_oth,sale type_vwd,sale type_wd,mas vnr type_brkcmn,mas vnr type_brkface,mas vnr type_cblock,mas vnr type_none,mas vnr type_stone,ms zoning_c (all),ms zoning_fv,ms zoning_i (all),ms zoning_rh,ms zoning_rl,ms zoning_rm,lot config_corner,lot config_culdsac,lot config_fr2,lot config_fr3,lot config_inside


In [256]:
features = [
    'overall qual','exter qual','gr liv area','kitchen qual','garage area',
    'garage cars','1st flr sf','bsmt qual','year built','garage finish',
    'year remod/add','fireplace qu','full bath','foundation_pconc','totrms abvgrd','mas vnr area',
    'fireplaces','heating qc','neighborhood_nridght','bsmtfin sf 1','bsmt exposure_gd','sale type_new','garage type_attchd',
    'exterior 1st_vinylsd','exterior 2nd_vinylsd','open porch sf','wood deck sf','lot frontage','mas vnr type_stone',
    'lot area','paved drive','garage qual','bsmt full bath','half bath','central air','roof style_hip','garage cond',
   'neighborhood_noridge','mas vnr type_brkface','neighborhood_stonebr',
    'mas vnr type_none','garage type_detchd',
    '2nd flr sf',
    'electrical',
    'ms zoning_rl','bsmt cond',
    'garage type_builtin','land contour_hls','house style_2story',
     'bsmt unf sf','exterior 1st_cemntbd','lot config_culdsac','exterior 2nd_cmentbd','neighborhood_somerst','bedroom abvgr',
     'alley_na','screen porch','bsmt exposure_av','functional','neighborhood_timber','condition 1_norm','condition 1_posn',
    
    'ms zoning_fv','bldg type_1fam','condition 2_posa','roof matl_wdshngl','heating_gasa',
    'neighborhood_veenker','neighborhood_collgcr','condition 1_posa','street','bldg type_twnhse','land slope',
    'neighborhood_crawfor','neighborhood_clearcr','3ssn porch','roof matl_wdshake','exterior 2nd_imstucc',
    
    'house style_2.5fin','exter cond','neighborhood_nwames','sale type_con','land contour_low','mo sold',
    #'bsmt exposure_mn','pool qc','utilities','exterior 1st_brkface','neighborhood_blmngtn','neighborhood_gilbert',
    # 'pool area','exterior 1st_imstucc','sale type_conli','roof matl_membran','neighborhood_sawyerw','bsmtfin sf 2',
    # 'lot config_fr3','roof style_shed',
    
    
    #### negative coors
    'foundation_cblock','lot shape','bsmt exposure_no','ms zoning_rm','pid','roof style_gable','garage type_na',
    'foundation_brktil','neighborhood_oldtown','house style_1.5fin','exterior 1st_wd sdng',
    'neighborhood_names','neighborhood_idotrr','neighborhood_edwards','exterior 2nd_wd sdng','alley_grvl',
    'fence','bsmt exposure_na','exterior 1st_metalsd','exterior 2nd_metalsd','enclosed porch','neighborhood_brkside',
    'neighborhood_sawyer','exterior 1st_asbshng','ms zoning_c (all)','kitchen abvgr','condition 1_artery',
    'foundation_slab','condition 1_feedr','exterior 1st_hdboard','bldg type_twnhs',
    'bldg type_2fmcon','exterior 2nd_asbshng','sale type_cod','bldg type_duplex','exterior 2nd_hdboard','land contour_bnk',
     'overall cond','neighborhood_brdale','land contour_lvl','ms subclass','lot config_inside','neighborhood_swisu']




# ,'heating_grav','garage type_carport','roof matl_compshg'
# ,'house style_1.5unf','house style_sfoyer',
#     'exterior 2nd_wd shng','exterior 1st_stucco']
# 'sale type_conld','exterior 2nd_plywood','garage type_basment','condition 1_rrae',
#     'exterior 2nd_stucco','id','exterior 1st_wdshing','condition 2_feedr','exterior 2nd_brk cmn',
#     'neighborhood_npkvill','bsmt half bath'
# ,'house style_slvl','roof style_gambrel','low qual fin sf','exterior 1st_plywood',
#     'condition 2_artery','heating_gasw','mas vnr type_brkcmn','neighborhood_mitchel','sale type_conlw','exterior 2nd_cblock',
#     'exterior 1st_cblock','ms zoning_i (all)','ms zoning_rh']


X = df_train[features]
y=df_train['saleprice']

In [257]:
model = LinearRegression()
model.fit(X,y)

LinearRegression()

In [258]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=42) 
print( model.score(X_train, y_train))
print( model.score(X_test, y_test))

cross_val_score(model, X_train, y_train).mean()

0.8877303917722793
0.9154836097954397


0.8449271805059585

In [254]:
0.8934689139456496
0.9157827829566405
0.8442579805223513

0.8442579805223513

In [255]:
pred = model.predict(df_test[features])   # make predictions
df_test['saleprice'] = pred               # add predictions to the test data


KeyError: "['exterior 1st_stone'] not in index"

In [190]:
#  np.sqrt(metrics.mean_squared_error(y, pred)) 

In [191]:
submission = df_test[['id','saleprice']]

In [192]:
submission.set_index('id', inplace = True)

In [193]:
submission.to_csv('./datasets/submissions/submission10.csv')