In [4]:
import pandas as pd
import numpy as np

In [5]:
df = pd.read_csv('homeprices2.csv') # DataFrames
df

Unnamed: 0,town,area,price
0,monroe township,2600,550000
1,monroe township,3000,565000
2,monroe township,3200,610000
3,monroe township,3600,680000
4,monroe township,4000,725000
5,west windsor,2600,585000
6,west windsor,2800,615000
7,west windsor,3300,650000
8,west windsor,3600,710000
9,robinsville,2600,575000


In [6]:
df['town'].unique()

array(['monroe township', 'west windsor', 'robinsville'], dtype=object)

In [7]:
dummies = pd.get_dummies(df.town, dtype=int)
dummies

Unnamed: 0,monroe township,robinsville,west windsor
0,1,0,0
1,1,0,0
2,1,0,0
3,1,0,0
4,1,0,0
5,0,0,1
6,0,0,1
7,0,0,1
8,0,0,1
9,0,1,0


In [8]:
merged = pd.concat([df, dummies], axis='columns')
merged

Unnamed: 0,town,area,price,monroe township,robinsville,west windsor
0,monroe township,2600,550000,1,0,0
1,monroe township,3000,565000,1,0,0
2,monroe township,3200,610000,1,0,0
3,monroe township,3600,680000,1,0,0
4,monroe township,4000,725000,1,0,0
5,west windsor,2600,585000,0,0,1
6,west windsor,2800,615000,0,0,1
7,west windsor,3300,650000,0,0,1
8,west windsor,3600,710000,0,0,1
9,robinsville,2600,575000,0,1,0


In [9]:
final = merged.drop(['town', 'west windsor'], axis= 'columns') # Dummy varible trap
final

Unnamed: 0,area,price,monroe township,robinsville
0,2600,550000,1,0
1,3000,565000,1,0
2,3200,610000,1,0
3,3600,680000,1,0
4,4000,725000,1,0
5,2600,585000,0,0
6,2800,615000,0,0
7,3300,650000,0,0
8,3600,710000,0,0
9,2600,575000,0,1


In [10]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

In [11]:
X = final.drop(['price'], axis='columns')
X

Unnamed: 0,area,monroe township,robinsville
0,2600,1,0
1,3000,1,0
2,3200,1,0
3,3600,1,0
4,4000,1,0
5,2600,0,0
6,2800,0,0
7,3300,0,0
8,3600,0,0
9,2600,0,1


In [12]:
y = final.price
y

Unnamed: 0,price
0,550000
1,565000
2,610000
3,680000
4,725000
5,585000
6,615000
7,650000
8,710000
9,575000


In [13]:
model.fit(X,y)

In [14]:
model.predict([[2800, 1, 0]]) #2800 sqft , Morne Township



array([565089.22812299])

In [15]:
model.predict([[2800, 0, 1]]) #2800 sqft , ROBINSVILLE



array([590775.63964739])

In [16]:
model.predict([[2800, 0, 0]]) #2800 sqft , West Windsore



array([605103.20361213])

In [17]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

In [18]:
# Predictions
df['predicted_price'] = model.predict(X)

In [19]:
df

Unnamed: 0,town,area,price,predicted_price
0,monroe township,2600,550000,539709.739841
1,monroe township,3000,565000,590468.716405
2,monroe township,3200,610000,615848.204687
3,monroe township,3600,680000,666607.181251
4,monroe township,4000,725000,717366.157816
5,west windsor,2600,585000,579723.71533
6,west windsor,2800,615000,605103.203612
7,west windsor,3300,650000,668551.924317
8,west windsor,3600,710000,706621.15674
9,robinsville,2600,575000,565396.151365


In [20]:
df_encoded = pd.get_dummies(df, columns=['town'])


In [22]:
df_encoded

Unnamed: 0,area,price,predicted_price,town_monroe township,town_robinsville,town_west windsor
0,2600,550000,539709.739841,True,False,False
1,3000,565000,590468.716405,True,False,False
2,3200,610000,615848.204687,True,False,False
3,3600,680000,666607.181251,True,False,False
4,4000,725000,717366.157816,True,False,False
5,2600,585000,579723.71533,False,False,True
6,2800,615000,605103.203612,False,False,True
7,3300,650000,668551.924317,False,False,True
8,3600,710000,706621.15674,False,False,True
9,2600,575000,565396.151365,False,True,False


In [24]:
df_encoded_area = pd.get_dummies(df, columns=['area'])

In [25]:
df_encoded_area

Unnamed: 0,town,price,predicted_price,area_2600,area_2800,area_2900,area_3000,area_3100,area_3200,area_3300,area_3600,area_4000
0,monroe township,550000,539709.739841,True,False,False,False,False,False,False,False,False
1,monroe township,565000,590468.716405,False,False,False,True,False,False,False,False,False
2,monroe township,610000,615848.204687,False,False,False,False,False,True,False,False,False
3,monroe township,680000,666607.181251,False,False,False,False,False,False,False,True,False
4,monroe township,725000,717366.157816,False,False,False,False,False,False,False,False,True
5,west windsor,585000,579723.71533,True,False,False,False,False,False,False,False,False
6,west windsor,615000,605103.203612,False,True,False,False,False,False,False,False,False
7,west windsor,650000,668551.924317,False,False,False,False,False,False,True,False,False
8,west windsor,710000,706621.15674,False,False,False,False,False,False,False,True,False
9,robinsville,575000,565396.151365,True,False,False,False,False,False,False,False,False


In [26]:
df_encoded_price = pd.get_dummies(df, columns=['price'])
df_encoded_price

Unnamed: 0,town,area,predicted_price,price_550000,price_565000,price_575000,price_585000,price_600000,price_610000,price_615000,price_620000,price_650000,price_680000,price_695000,price_710000,price_725000
0,monroe township,2600,539709.739841,True,False,False,False,False,False,False,False,False,False,False,False,False
1,monroe township,3000,590468.716405,False,True,False,False,False,False,False,False,False,False,False,False,False
2,monroe township,3200,615848.204687,False,False,False,False,False,True,False,False,False,False,False,False,False
3,monroe township,3600,666607.181251,False,False,False,False,False,False,False,False,False,True,False,False,False
4,monroe township,4000,717366.157816,False,False,False,False,False,False,False,False,False,False,False,False,True
5,west windsor,2600,579723.71533,False,False,False,True,False,False,False,False,False,False,False,False,False
6,west windsor,2800,605103.203612,False,False,False,False,False,False,True,False,False,False,False,False,False
7,west windsor,3300,668551.924317,False,False,False,False,False,False,False,False,True,False,False,False,False
8,west windsor,3600,706621.15674,False,False,False,False,False,False,False,False,False,False,False,True,False
9,robinsville,2600,565396.151365,False,False,True,False,False,False,False,False,False,False,False,False,False
