In [1]:
import pandas as pd
pd.set_option('display.max_rows', 500)
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv("https://raw.githubusercontent.com/ryanleeallred/datasets/master/Ames%20Housing%20Data/train.csv")


In [2]:
def engineer_me(df):
    
    # random
    df['fullhalfbath_interaction'] = df.FullBath * df.HalfBath
    df['floor_interaction'] = df['1stFlrSF'] * df['2ndFlrSF']
    df['lotarea_squared'] = df.LotArea ** 2
    df['overallqual_squared'] = df.OverallQual ** 2
    df['overallcond_squared'] = df.OverallCond ** 2
    df['sq_footage_fsb'] = df['1stFlrSF'] + df['2ndFlrSF'] + df.BsmtFinSF1
    df['sqfootage_fsb_squared'] = df.sq_footage_fsb ** 2
    df['fullbath_squared'] = df.FullBath ** 2
    df['grlivarea_squared'] = df.GrLivArea ** 2
    df['atypical_function'] = (df.Functional != 'Typ')
    df.LotFrontage = df.LotFrontage.fillna(70)
    df['lotfrontage_squared'] = df.LotFrontage ** 2
    df['OpenPorchSF_squared'] = df.OpenPorchSF ** 2
    df['WoodDeckSF_squared'] = df.WoodDeckSF ** 2
    df['EnclosedPorch_squared'] = df.EnclosedPorch ** 2
    df['ScreenPorch_squared'] = df.ScreenPorch ** 2
    df['TotRmsAbvGrd_squared'] = df.TotRmsAbvGrd ** 2
    df['LowQualFinSF_squared'] = df.LowQualFinSF ** 2
    df['LowQualFinSF_as_pct_fsb'] = df.LowQualFinSF / df.sq_footage_fsb
    df['ExterQual_numeric'] = df.ExterQual.map({'Po':0, 'Fa':1, 'TA':2, 'Gd':3, 'Ex':4})
    df['ExterQual_numeric_squared'] = df.ExterQual_numeric ** 2
    df['street_grvl'] = (df.Street == 'Grvl')
    df['LotConfig_Culdasac'] = (df.LotConfig == 'CullDSac')
    
    
    # dates
    df['sold_pre_2008'] = (df.YrSold < 2008)
    df['age_sold'] = df.YrSold - df.YearBuilt
    df['age_sold_squared'] = df.age_sold ** 2
    df['is_start_of_school'] = (df.MoSold == 9)
    df['is_spring'] = ((df.MoSold == 4) | (df.MoSold == 5))
    df['years_since_remod'] = df.YrSold - df.YearRemodAdd
    df['years_since_remod_squared'] = df.YrSold - df.YearRemodAdd
    df['age_remod_interaction'] = df.age_sold * df.years_since_remod
    df['age_remod_interaction_squared'] = df.age_sold_squared * df.years_since_remod_squared
    df['remodeled?'] = (df.YearBuilt != df.YearRemodAdd)
    df['age_remodeled_interaction_good'] = (df.age_sold * df['remodeled?']) 
    df['overallcond_5over'] = (df.OverallCond >= 5)
    df['sq_footage_fsbg'] = df['sq_footage_fsb'] + df.GarageArea
    df['sq_footage_fsbg_squared'] = df.sq_footage_fsbg ** 2 
    
    
    # MSZoning - General Zoning classification of the sale
    df['mszoning_RL'] = (df.MSZoning == 'RL')
    df['mszoning_c'] = (df.MSZoning == 'C (all)')
    df['mszoning_RM'] = (df.MSZoning == 'RM')
    df['mszoning_FV'] = (df.MSZoning == 'FV')
    
    
    # Home Location 
    df['good_area'] = ((df.Neighborhood == 'NoRidge') | (df.Neighborhood == 'NridgeHt') | (df.Neighborhood == 'StoneBr'))
    df['bad_area'] = ((df.Neighborhood == 'MeadowV') | (df.Neighborhood == 'IDOTRR') | (df.Neighborhood == 'BrDale'))
    
    
    # Home Type
    df['one_fam_home'] = (df.BldgType == '1Fam')
    df['townhouse'] = (df.BldgType == 'TwnhsE')
    df['onehalf_fin'] = (df.HouseStyle == '1.5Fin')
    df['onehalf_unfin'] = (df.HouseStyle == '1.5Unf')

    
    # MiscFeature
    df['miscfeature_isnone'] = (df.MiscFeature == 'None')
    df['has_shed'] = (df.MiscFeature == 'Shed')
    df['has_pool'] = (df.PoolArea > 0)
    df['pool_exc'] = (df.PoolQC == 'Ex')
    
    # Need to add MiscFeature Values
    
    
    # SaleType & SaleCondition
    df['new_home'] = (df.SaleType == 'New')
    df['saletype_cod'] = (df.SaleType == 'COD')
    df['saletype_lowdn_lowint'] = ((df.SaleType == 'ConLD') | (df.SaleType == 'ConLw'))
    df['salecond_partial'] = (df.SaleCondition == 'Partial')
    df['salecond_abnorml'] = (df.SaleCondition == 'Abnorml')
    df['salecond_family'] = (df.SaleCondition == 'Family')
    
    # Garage
    df['attached_garage'] = (df.GarageType == 'Attchd')
    df['detchd_garage'] = (df.GarageType == 'Detchd')
    df['garage_cars_squared'] = df.GarageCars ** 2
    df['garagearea_squared'] = df.GarageArea ** 2
    df.GarageQual = df.GarageQual.fillna('missing')
    df['garageQual_numeric'] = df.GarageQual.map({'missing': 0, 'Po': 0, 'Fa': 1, 'TA': 2, 'Gd':3, 'Ex': 4})
    df['garageQual_numeric_squared'] = df.garageQual_numeric ** 2
    df['garage_unf'] = (df.GarageFinish == 'Unf')
    df['garage_Fin'] = (df.GarageFinish == 'Fin')
    df['Garage_Added'] = (df.GarageYrBlt != df.YearBuilt)
    
    
    # Heating & Air
    df['heating_gasa'] = (df.Heating == 'GasA')
    df['heating_qual_ex'] = (df.HeatingQC == 'Ex')
    df['heating_qual_pr'] = (df.HeatingQC == 'Po')
    df['heating_qual_fa'] = (df.HeatingQC == 'Fa') # maybe make heating quality numeric and square it?
    df['no_central_air'] = (df.CentralAir == 'N')
    
    
    # Basement
    df['bsmt_unfin_percent'] = df.BsmtUnfSF / df.TotalBsmtSF
    df.bsmt_unfin_percent = df.bsmt_unfin_percent.fillna(1)
    df['finished_bsmt'] = (df.bsmt_unfin_percent == 0)
    df.BsmtExposure = df.BsmtExposure.fillna('NoBsmt')
    df['BsmtExposure_numeric'] = df.BsmtExposure.map({'NoBsmt': 0, 'No': 1, 'Mn':2, 'Av':3, 'Gd':4})
    df['BsmtFinSF1_squared'] = df.BsmtFinSF1 ** 2
    
    
    # Proximity to various conditions
    df['pos1_feedr_art'] = ((df.Condition1 == 'Feedr') | (df.Condition1 == 'Artery'))
    df['pos2_feedr_art'] = ((df.Condition2 == 'Feedr') | (df.Condition2 == 'Artery'))
    df['pos1_pos2_sum1'] = (df.pos1_feedr_art + df.pos2_feedr_art == 1)
    df['pos1_pos2_sum2'] = (df.pos1_feedr_art + df.pos2_feedr_art == 2)
    
    
    # Kitchen Stuff
    df['kitchen_numeric'] = df.KitchenQual.map({'Po': 1, 'Fa':2, 'TA':3, 'Gd':4, 'Ex':5})
    df['kitchen_numeric_squared'] = df.kitchen_numeric ** 2
    
    
    # Home Exterior / Roof
    df['exterior_exc'] = (df.ExterQual == 'Ex')
    df['RoofStyle_Hip'] = (df.RoofStyle == 'Hip')
    df['RoofStyle_Gambrel'] = (df.RoofStyle == 'Gambrel')
    df['RoofMatl_Wd'] = ((df.RoofMatl == 'WdShngl') | (df.RoofMatl == 'WdShake'))
    df['Exterior1st_VinylSd'] = (df.Exterior1st == 'VinylSd')
    df['Exterior1st_MetalSd'] = (df.Exterior1st == 'MetalSd')
    df['Exterior1st_WdSdng'] = ((df.Exterior1st == 'Wd Sdng') | (df.Exterior1st == 'WdShing'))
    df['Exterior1st_HdBoard'] = (df.Exterior1st == 'HdBoard')
    df['Exterior1st_CemntBd'] = (df.Exterior1st == 'CemntBd')
    df['Exterior1st_BrkFace'] = (df.Exterior1st == 'BrkFace')
    df['Exterior1st_AsbShng'] = (df.Exterior1st == 'AsbShng')

    
    # Fireplaces
    df['no_fireplace'] = (df.Fireplaces == 0)
    df['two_or_more_fireplaces'] = (df.Fireplaces >= 2)
    df.FireplaceQu = df.FireplaceQu.fillna('None')
    df['FireplaceQu_numeric'] = df.FireplaceQu.map({'None': 1, 'Po': 0, 'Fa':1, 'TA':2, 'Gd':3, 'Ex':4})
    df['FireplaceQu_numeric_squared'] = df.FireplaceQu_numeric ** 2
    
    
    # Electrical
    df['electrical_is_brkr'] = (df.Electrical == 'SBrkr')
    df['electrical_is_F'] = (df.Electrical == 'FuseF')
    df['electrical_is_A'] = (df.Electrical == 'FuseA')
    df['electrical_is_P'] = (df.Electrical == 'FuseP')
    
    # Land Shape / Foundation
    df = pd.get_dummies(df, prefix='Foundation_', columns=['Foundation'])
    df['banked_land'] = (df.LandContour == 'Bnk')
    df['HLS_land'] = (df.LandContour == 'HLS_land')
    df['landslope_mod_sev'] = (df.LandSlope != 'Gtl')
    df['land_interaction'] = df.HLS_land + df.landslope_mod_sev
    df['LotShape_Reg'] = (df.LotShape == 'Reg')
    
    #MasVnr
    df['MasVnrType_Stone'] = (df.MasVnrType == 'Stone')
    df['MasVnrType_BrkFace'] = (df.MasVnrType == 'BrkFace')
    df['MasVnrType_BrkCmn'] = (df.MasVnrType == 'BrkCmn')
    df.MasVnrArea = df.MasVnrArea.fillna(0)
    df['MasVnrArea_squared'] = df.MasVnrArea ** 2
    
    # Neighborhood
    df['Neighborhood_Crawfor'] = (df.Neighborhood == 'Crawfor')
    df['Neighborhood_2'] = df.Neighborhood
    df = pd.get_dummies(df, prefix='Neighborhood', columns=['Neighborhood_2'])
    
    #driveway 
    
    df['paved_drive_numeric'] = df.PavedDrive.map({'Y': 2, 'P': 1, 'N': 0})
    df['paved_drive_squared'] = df.paved_drive_numeric ** 2
    
    #Fence
    df.Fence = df.Fence.fillna('None')
    df['TotRmsAbvGrd_perGrLivArea'] = df.TotRmsAbvGrd / df.GrLivArea
    df['total_bathrooms'] = (df.BsmtFullBath + df.BsmtHalfBath + df.FullBath + df.HalfBath)
    df['total_bathrooms_squared'] = df.total_bathrooms ** 2
    
    df.Alley = df.Alley.fillna('None')
    df.Alley = pd.factorize(df.Alley)[0]
    return df


In [3]:
features = ['LotArea','OverallQual', 'OverallCond', 'GrLivArea','GarageArea', 'FullBath', 'HalfBath', 'fullhalfbath_interaction', 'BedroomAbvGr',
            'sold_pre_2008', 'age_sold','age_sold_squared', 'years_since_remod', 'age_remod_interaction', '1stFlrSF', '2ndFlrSF', 'floor_interaction', 'mszoning_RL', 'mszoning_c',
            'mszoning_RM', 'mszoning_FV', 'good_area', 'bad_area', 'one_fam_home', 'townhouse', 'miscfeature_isnone', 'has_shed', 'new_home', 'attached_garage',
            'detchd_garage', 'lotarea_squared', 'overallqual_squared', 'overallcond_squared', 'is_start_of_school', 'is_spring', 'remodeled?', 'age_remodeled_interaction_good',
            'sq_footage_fsb', 'sqfootage_fsb_squared', 'grlivarea_squared', 'heating_qual_ex', 'heating_qual_pr', 'heating_qual_fa', 'finished_bsmt', 'pos1_feedr_art', 'pos2_feedr_art',
            'pos1_pos2_sum1', 'pos1_pos2_sum2', 'kitchen_numeric_squared', 'garage_cars_squared', 'atypical_function', 'exterior_exc', 'salecond_abnorml', 'no_fireplace', 'lotfrontage_squared',
            'garagearea_squared', 'sq_footage_fsbg', 'banked_land', 'HLS_land', 'salecond_family', 'paved_drive_numeric', 'paved_drive_squared', 'OpenPorchSF', 'WoodDeckSF', 'EnclosedPorch',
            'ScreenPorch', 'garageQual_numeric_squared', 'garage_unf', 'garage_Fin', 'no_central_air', 'Foundation__CBlock', 'Foundation__PConc', 'Foundation__Slab', 'Foundation__Stone', 'Foundation__Wood',
            'RoofStyle_Hip', 'RoofStyle_Gambrel', 'RoofMatl_Wd','TotRmsAbvGrd_squared', 'LowQualFinSF', 'LowQualFinSF_squared', 'Exterior1st_VinylSd', 'Exterior1st_MetalSd', 'Exterior1st_WdSdng', 'MasVnrType_Stone',
            'MasVnrType_BrkCmn', 'street_grvl', 'Neighborhood_Crawfor', 'FireplaceQu_numeric_squared', 'Garage_Added', 'LotShape_Reg', 'TotRmsAbvGrd_perGrLivArea', 'total_bathrooms', 'total_bathrooms_squared']


In [4]:
df = engineer_me(df)

  .format(op=op_str, alt_op=unsupported[op_str]))


In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

X = df[features].values
y = np.log(df['SalePrice'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

lr = LinearRegression()
lr.fit(X_train, y_train)

mse = mean_squared_error(lr.predict(X_test), y_test)
rmse = np.sqrt(mse)
print(rmse)

0.10438510658530134


In [6]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

X = df[features].values
y = np.log(df['SalePrice'])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

lr = Ridge(solver='svd')
lr.fit(X_train, y_train)

mse = mean_squared_error(lr.predict(X_test), y_test)
rmse = np.sqrt(mse)
print(rmse)

0.12787881640716028


In [7]:
df 

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,Neighborhood_SawyerW,Neighborhood_Somerst,Neighborhood_StoneBr,Neighborhood_Timber,Neighborhood_Veenker,paved_drive_numeric,paved_drive_squared,TotRmsAbvGrd_perGrLivArea,total_bathrooms,total_bathrooms_squared
0,1,60,RL,65.0,8450,Pave,0,Reg,Lvl,AllPub,...,0,0,0,0,0,2,4,0.004678,4,16
1,2,20,RL,80.0,9600,Pave,0,Reg,Lvl,AllPub,...,0,0,0,0,1,2,4,0.004754,3,9
2,3,60,RL,68.0,11250,Pave,0,IR1,Lvl,AllPub,...,0,0,0,0,0,2,4,0.003359,4,16
3,4,70,RL,60.0,9550,Pave,0,IR1,Lvl,AllPub,...,0,0,0,0,0,2,4,0.004077,2,4
4,5,60,RL,84.0,14260,Pave,0,IR1,Lvl,AllPub,...,0,0,0,0,0,2,4,0.004095,4,16
5,6,50,RL,85.0,14115,Pave,0,IR1,Lvl,AllPub,...,0,0,0,0,0,2,4,0.003671,3,9
6,7,20,RL,75.0,10084,Pave,0,Reg,Lvl,AllPub,...,0,1,0,0,0,2,4,0.004132,3,9
7,8,60,RL,70.0,10382,Pave,0,IR1,Lvl,AllPub,...,0,0,0,0,0,2,4,0.003349,4,16
8,9,50,RM,51.0,6120,Pave,0,Reg,Lvl,AllPub,...,0,0,0,0,0,2,4,0.004510,2,4
9,10,190,RL,50.0,7420,Pave,0,Reg,Lvl,AllPub,...,0,0,0,0,0,2,4,0.004643,2,4


In [8]:
supplemental_df = pd.read_csv('MAs_and_House_starts.csv')

In [9]:
supplemental_df

Unnamed: 0,Mo_num,Houses_Sold,Ames_MA_3,Ames_MA_6,Housing_Starts,Nat_MA_3,Nat_MA_6
0,1,10,10.0,10.0,2273,2273.0,2273.0
1,2,9,9.5,9.5,2119,2196.0,2196.0
2,3,25,14.666667,14.666667,1969,2120.333333,2120.333333
3,4,27,20.333333,17.75,1821,1969.666667,2045.5
4,5,38,30.0,21.8,1942,1910.666667,2024.8
5,6,48,37.666667,26.166667,1802,1855.0,1987.666667
6,7,67,51.0,35.666667,1737,1827.0,1898.333333
7,8,23,46.0,38.0,1650,1729.666667,1820.166667
8,9,15,35.0,36.333333,1720,1702.333333,1778.666667
9,10,24,20.666667,35.833333,1491,1620.333333,1723.666667


In [10]:
df['Mo'] = 12 * (df['YrSold'] - 2006) + df['MoSold']


In [11]:
df['Mo'].describe()

count    1460.000000
mean       28.110959
std        15.771632
min         1.000000
25%        16.000000
50%        29.000000
75%        42.000000
max        55.000000
Name: Mo, dtype: float64

In [12]:
df['ho_sold'] = df['Mo']
df['Ames_3MMA'] = df['Mo']
df['Ames_6MMA'] = df['Mo']
df['Nat_ho_starts'] = df['Mo'] 
df['Nat_starts_3MMA'] = df['Mo'] 
df['Nat_starts_6MMA'] = df['Mo']

In [13]:
test_df = df

In [29]:
from collections import defaultdict


d_1 = defaultdict(list)

for idx, row in supplemental_df.iterrows():
    d_1[row['Mo_num']].append(row['Houses_Sold'])

d_1 = [{k: v[0]} for k, v in d_1.items()]

In [30]:
d_1

[{1.0: 10.0},
 {2.0: 9.0},
 {3.0: 25.0},
 {4.0: 27.0},
 {5.0: 38.0},
 {6.0: 48.0},
 {7.0: 67.0},
 {8.0: 23.0},
 {9.0: 15.0},
 {10.0: 24.0},
 {11.0: 16.0},
 {12.0: 12.0},
 {13.0: 13.0},
 {14.0: 8.0},
 {15.0: 23.0},
 {16.0: 23.0},
 {17.0: 43.0},
 {18.0: 59.0},
 {19.0: 51.0},
 {20.0: 40.0},
 {21.0: 11.0},
 {22.0: 16.0},
 {23.0: 24.0},
 {24.0: 18.0},
 {25.0: 13.0},
 {26.0: 10.0},
 {27.0: 18.0},
 {28.0: 26.0},
 {29.0: 38.0},
 {30.0: 51.0},
 {31.0: 49.0},
 {32.0: 29.0},
 {33.0: 17.0},
 {34.0: 22.0},
 {35.0: 17.0},
 {36.0: 14.0},
 {37.0: 12.0},
 {38.0: 10.0},
 {39.0: 19.0},
 {40.0: 26.0},
 {41.0: 37.0},
 {42.0: 59.0},
 {43.0: 61.0},
 {44.0: 30.0},
 {45.0: 20.0},
 {46.0: 27.0},
 {47.0: 22.0},
 {48.0: 15.0},
 {49.0: 10.0},
 {50.0: 15.0},
 {51.0: 21.0},
 {52.0: 39.0},
 {53.0: 48.0},
 {54.0: 36.0},
 {55.0: 6.0}]

In [37]:
test_dict_1 = {}
for d in d_1:
    test_dict_1.update(d)

In [38]:
test_dict_1

{1.0: 10.0,
 2.0: 9.0,
 3.0: 25.0,
 4.0: 27.0,
 5.0: 38.0,
 6.0: 48.0,
 7.0: 67.0,
 8.0: 23.0,
 9.0: 15.0,
 10.0: 24.0,
 11.0: 16.0,
 12.0: 12.0,
 13.0: 13.0,
 14.0: 8.0,
 15.0: 23.0,
 16.0: 23.0,
 17.0: 43.0,
 18.0: 59.0,
 19.0: 51.0,
 20.0: 40.0,
 21.0: 11.0,
 22.0: 16.0,
 23.0: 24.0,
 24.0: 18.0,
 25.0: 13.0,
 26.0: 10.0,
 27.0: 18.0,
 28.0: 26.0,
 29.0: 38.0,
 30.0: 51.0,
 31.0: 49.0,
 32.0: 29.0,
 33.0: 17.0,
 34.0: 22.0,
 35.0: 17.0,
 36.0: 14.0,
 37.0: 12.0,
 38.0: 10.0,
 39.0: 19.0,
 40.0: 26.0,
 41.0: 37.0,
 42.0: 59.0,
 43.0: 61.0,
 44.0: 30.0,
 45.0: 20.0,
 46.0: 27.0,
 47.0: 22.0,
 48.0: 15.0,
 49.0: 10.0,
 50.0: 15.0,
 51.0: 21.0,
 52.0: 39.0,
 53.0: 48.0,
 54.0: 36.0,
 55.0: 6.0}

In [39]:
d_1 = test_dict_1

In [40]:
# 3 month moving average
d_2 = defaultdict(list)

for idx, row in supplemental_df.iterrows():
    d_2[row['Mo_num']].append(row['Houses_Sold'])

d_2 = [{k: v} for k, v in d_2.items()]

# 6 month moving average

d_3 = defaultdict(list)

for idx, row in supplemental_df.iterrows():
    d_3[row['Mo_num']].append(row['Houses_Sold'])

d_3 = [{k: v} for k, v in d_3.items()]

# National Housing Starts

d_4 = defaultdict(list)

for idx, row in supplemental_df.iterrows():
    d_4[row['Mo_num']].append(row['Houses_Sold'])

d_4 = [{k: v} for k, v in d_4.items()]

# 3 month MA of national housting starts

d_5 = defaultdict(list)

for idx, row in supplemental_df.iterrows():
    d_5[row['Mo_num']].append(row['Houses_Sold'])

d_5 = [{k: v} for k, v in d_5.items()]

# 6 month MA of national housting starts

d_6 = defaultdict(list)

for idx, row in supplemental_df.iterrows():
    d_6[row['Mo_num']].append(row['Houses_Sold'])

d_6 = [{k: v} for k, v in d_6.items()]

unpacked_2 = {}
for d in d_2:
    unpacked_2.update(d)
    
unpacked_3 = {}
for d in d_3:
    unpacked_3.update(d)
    
unpacked_4 = {}
for d in d_4:
    unpacked_4.update(d)
    
unpacked_5 = {}
for d in d_5:
    unpacked_5.update(d)
    
unpacked_6 = {}
for d in d_6:
    unpacked_6.update(d)
    
    
    
d_2 = unpacked_2
d_3 = unpacked_3
d_4 = unpacked_4
d_5 = unpacked_5
d_6 = unpacked_6

In [41]:
test_df['ho_sold'].map(d_1)
test_df['Ames_3MMA'].map(d_2)
test_df['Ames_6MMA'].map(d_3)
test_df['Nat_ho_starts'].map(d_4)
test_df['Nat_starts_3MMA'].map(d_5)
test_df['Nat_starts_6MMA'].map(d_6)


#df['ho_sold'] = df['Mo']
#df['Ames_3MMA'] = df['Mo']
#df['Ames_6MMA'] = df['Mo']
#df['Nat_ho_starts'] = df['Mo'] 
#df['Nat_starts_3MMA'] = df['Mo'] 
#df['Nat_starts_6MMA'] = df['Mo']

0       [10.0]
1       [43.0]
2       [17.0]
3        [9.0]
4       [14.0]
5       [27.0]
6       [40.0]
7       [22.0]
8       [26.0]
9       [13.0]
10      [10.0]
11      [67.0]
12      [17.0]
13      [40.0]
14      [38.0]
15      [51.0]
16      [21.0]
17      [24.0]
18      [51.0]
19      [37.0]
20      [16.0]
21      [59.0]
22      [17.0]
23      [59.0]
24      [48.0]
25      [61.0]
26      [48.0]
27      [48.0]
28      [12.0]
29      [38.0]
30      [49.0]
31      [51.0]
32      [13.0]
33      [39.0]
34      [40.0]
35      [15.0]
36      [59.0]
37      [27.0]
38      [10.0]
39      [51.0]
40      [12.0]
41      [51.0]
42      [18.0]
43      [49.0]
44      [38.0]
45      [15.0]
46      [30.0]
47      [51.0]
48      [59.0]
49      [13.0]
50      [51.0]
51      [15.0]
52      [48.0]
53      [16.0]
54       [8.0]
55      [49.0]
56      [30.0]
57      [23.0]
58      [24.0]
59      [13.0]
60      [38.0]
61      [23.0]
62      [16.0]
63      [39.0]
64      [10.0]
65      [16.0]
66       [

In [42]:
test_df['Ames_3MMA']

0       26
1       17
2       33
3        2
4       36
5       46
6       20
7       47
8       28
9       25
10      26
11       7
12      33
13      20
14      29
15      19
16      51
17      10
18      30
19      41
20      11
21      18
22      33
23      18
24      53
25      43
26      53
27      53
28      12
29      29
30      31
31      30
32      25
33      52
34      20
35       9
36      42
37      46
38      49
39      30
40      12
41      19
42      24
43      31
44       5
45      50
46      44
47      19
48      42
49      13
50      19
51       9
52      53
53      11
54      14
55      31
56      44
57       8
58      10
59      25
60       5
61      15
62      22
63      52
64      38
65      22
66      55
67      18
68      54
69       7
70      14
71      18
72      48
73      53
74      53
75      47
76      28
77      25
78      52
79      41
80      42
81       3
82      34
83      19
84      41
85       4
86      39
87      42
88      46
89      20
90       7

In [44]:
print (df.shape)
print (test_df.shape)
df = test_df
print (df.shape)


(1460, 232)
(1460, 232)
(1460, 232)


In [45]:
df['Ames_3MMA']

0       26
1       17
2       33
3        2
4       36
5       46
6       20
7       47
8       28
9       25
10      26
11       7
12      33
13      20
14      29
15      19
16      51
17      10
18      30
19      41
20      11
21      18
22      33
23      18
24      53
25      43
26      53
27      53
28      12
29      29
30      31
31      30
32      25
33      52
34      20
35       9
36      42
37      46
38      49
39      30
40      12
41      19
42      24
43      31
44       5
45      50
46      44
47      19
48      42
49      13
50      19
51       9
52      53
53      11
54      14
55      31
56      44
57       8
58      10
59      25
60       5
61      15
62      22
63      52
64      38
65      22
66      55
67      18
68      54
69       7
70      14
71      18
72      48
73      53
74      53
75      47
76      28
77      25
78      52
79      41
80      42
81       3
82      34
83      19
84      41
85       4
86      39
87      42
88      46
89      20
90       7

In [None]:
# We now have the ultimate DF with the greatest degree of advanced 
# feature engineering the world has ever seen