In [64]:
import numpy as np
from sklearn import datasets, linear_model, preprocessing
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import pandas

if __name__ == "__main__":
  df = pandas.read_csv("apartmentComplexData.txt", names=["col1", "col2", "complexAge", "totalRooms", "totalBedrooms", "complexInhabitants", "apartmentsNr", "col8", "medianCompexValue"])

  X = df.iloc[:, 2:7]
  Y = df.iloc[:, 8]

  X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

  # Ordinary Least Squares
  regr = linear_model.LinearRegression()
  regr.fit(X_train, y_train)

  prediction = regr.predict(X_test)
  print("ORDINARY LEAST SQUARES")
  a = pandas.DataFrame({ "Prediction": prediction, "Actual": y_test})
  print(a.head())
  print('Score: %.2f' % regr.score(X_test, y_test))
  

ORDINARY LEAST SQUARES
          Prediction    Actual
17640  242900.872577  255600.0
17153  230210.234899  500001.0
13693  157827.367250   81600.0
19827  156924.459396   67400.0
13795  148116.843598   69500.0
Score: 0.14


In [65]:
import numpy as np
from sklearn import datasets, linear_model, preprocessing
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import pandas

if __name__ == "__main__":
  df = pandas.read_csv("apartmentComplexData.txt", names=["IGNORED1", "IGNORED2", "complexAge", "totalRooms", "totalBedrooms", "complexInhabitants", "apartmentsNr", "IGNORED8", "medianCompexValue"])
  df = df.drop(["IGNORED1", "IGNORED2", "IGNORED8"], axis=1)

  Q1 = df.quantile(0.25)
  Q3 = df.quantile(0.75)
  IQR = Q3 - Q1
  df = df[(df > (Q1 - 1.5 * IQR)) & (df < (Q3 + 1.5 * IQR))]
  df = df.dropna()

  X = df.iloc[:, 0:5]
  Y = df.iloc[:, 5]

  X = preprocessing.normalize(X, norm='l2')
  # Y = preprocessing.normalize(Y, norm='l2')
  X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

  # Ordinary Least Squares
  regr = linear_model.LinearRegression()
  regr.fit(X_train, y_train)

  prediction = regr.predict(X_test)
  print("ORDINARY LEAST SQUARES")
  a = pandas.DataFrame({ "Prediction": prediction, "Actual": y_test})
  print(a.head())
  print('Score: %.2f' % regr.score(X_test, y_test))
  

ORDINARY LEAST SQUARES
          Prediction    Actual
4791   168460.948829  125700.0
17046  242606.604408  451300.0
11370  139786.334333  218800.0
8658   230132.269697  366200.0
16491  164727.233284  156500.0
Score: 0.17


In [67]:
import numpy as np
from sklearn import datasets, linear_model, preprocessing, svm
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import pandas
from sklearn.neural_network import MLPRegressor

if __name__ == "__main__":
  df = pandas.read_csv("apartmentComplexData.txt", names=["IGNORED1", "IGNORED2", "complexAge", "totalRooms", "totalBedrooms", "complexInhabitants", "apartmentsNr", "IGNORED8", "medianCompexValue"])
  df = df.drop(["IGNORED1", "IGNORED2", "IGNORED8"], axis=1)

  X = df.iloc[:, 0:5]
  Y = df.iloc[:, 5]

  X = preprocessing.normalize(X, norm='l2')
  X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)
  
  # Ordinary Least Squares
  regr = linear_model.LinearRegression()
  regr.fit(X_train, y_train)

  prediction = regr.predict(X_test)
  print("ORDINARY LEAST SQUARES")
  a = pandas.DataFrame({ "Prediction": prediction, "Actual": y_test})
  print(a.head())
  print('Score: %.2f' % regr.score(X_test, y_test))
  print("===================================")

  # Ridged regression
  regr = linear_model.Ridge()
  regr.fit(X_train, y_train)

  prediction = regr.predict(X_test)
  print("RIDGED REGRESSION")
  a = pandas.DataFrame({ "Prediction": prediction, "Actual": y_test})
  print(a.head())
  print('Score: %.2f' % regr.score(X_test, y_test))
  print("===================================")

  # Lasso regression
  regr = linear_model.Lasso()
  regr.fit(X_train, y_train)

  prediction = regr.predict(X_test)
  print("LASSO")
  example_prediction = regr.predict([[41.0, 880.0, 129.0, 322.0, 126.0]])
  a = pandas.DataFrame({ "Prediction": prediction, "Actual": y_test})
  print(a.head())
  print('Score: %.2f' % regr.score(X_test, y_test))
  print("===================================")

  # LASSO LARS
  regr = linear_model.LassoLars()
  regr.fit(X_train, y_train)

  prediction = regr.predict(X_test)
  print("ELASTIC")
  a = pandas.DataFrame({ "Prediction": prediction, "Actual": y_test})
  print(a.head())
  print('Score: %.2f' % regr.score(X_test, y_test))
  print("===================================")

  regr = MLPRegressor(random_state=1, max_iter=500)
  regr.fit(X_train, y_train)
  prediction = regr.predict(X_test)
  print("ELASTIC")
  a = pandas.DataFrame({ "Prediction": prediction, "Actual": y_test})
  print(a.head())
  print('Score: %.2f' % regr.score(X_test, y_test))
  print("===================================")


ORDINARY LEAST SQUARES
          Prediction    Actual
10377  281295.609685  436600.0
5303   295689.975731  500001.0
15159  200227.053704  165800.0
6466   202028.138275  248900.0
1732   153933.915991  113600.0
Score: 0.22
RIDGED REGRESSION
          Prediction    Actual
10377  276250.069136  436600.0
5303   287590.937811  500001.0
15159  202753.079524  165800.0
6466   201966.386195  248900.0
1732   165309.767266  113600.0
Score: 0.22
LASSO
          Prediction    Actual
10377  281203.297690  436600.0
5303   295536.546290  500001.0
15159  200276.427891  165800.0
6466   202045.591353  248900.0
1732   154117.718303  113600.0
Score: 0.22
ELASTIC
          Prediction    Actual
10377  280454.465898  436600.0
5303   294240.986031  500001.0
15159  200751.866035  165800.0
6466   202453.519222  248900.0
1732   155245.753203  113600.0
Score: 0.22
ELASTIC
          Prediction    Actual
10377  194069.473175  436600.0
5303   198284.292582  500001.0
15159  202822.369833  165800.0
6466   203610.814241 

