In [118]:
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [119]:
df = pd.read_csv("data/turboaz.csv")
df.head()

Unnamed: 0,Sheher,Marka,Model,Buraxilish ili,Ban novu,Reng,Muherrikin hecmi,Muherrikin gucu,Yanacaq novu,Yurush,Suretler qutusu,Oturucu,Yeni,Qiymet,Extra Info,Seller comment
0,Sumqayıt,Mercedes,C 200,1999,Sedan,Qara,2.0 L,175 a.g.,Benzin,366 000 km,Avtomat,Arxa,Xeyr,12500 AZN,Kreditdədir,Barter mümkündür
1,Bakı,Mercedes,C 180,2014,Sedan,Qara,1.6 L,167 a.g.,Benzin,102 000 km,Avtomat,Arxa,Xeyr,31500 $,Barter mümkündür,"Yüngül lehimli disklər,ABS,Yağış sensoru,Mərkə..."
2,Sumqayıt,Mercedes,C 240,2002,Sedan,Qara,2.6 L,177 a.g.,Benzin,469 700 km,Avtomat,Arxa,Xeyr,11700 AZN,Kreditdədir,Barter mümkündür
3,Şirvan,Mercedes,C 180,1998,Sedan,Göy,1.8 L,125 a.g.,Benzin,556 680 km,Avtomat,Arxa,Xeyr,9700 AZN,"Yüngül lehimli disklər,ABS,Mərkəzi qapanma,Kon...",Mawin BAKIDADI. Tecili satilir yaxwi veziyetde...
4,Sumqayıt,Mercedes,C 220,2000,Sedan,Gümüşü,2.2 L,150 a.g.,Dizel,300 000 km,Avtomat,Arxa,Xeyr,12700 AZN,"Yüngül lehimli disklər,ABS,Mərkəzi qapanma,Par...",Vurugu udari deyisen detali curuyu qetiyyen yo...


In [120]:
# We will use year, engine vol ad mileage of the car in order to predict price of the car

df = df[["Buraxilish ili","Muherrikin hecmi", "Yurush", "Qiymet"]]
df.rename(columns = {"Buraxilish ili" : "Year", "Qiymet" : "Price", "Yurush" : "Mileage", "Muherrikin hecmi" : "Volume"}, inplace=True)
df.head()

Unnamed: 0,Year,Volume,Mileage,Price
0,1999,2.0 L,366 000 km,12500 AZN
1,2014,1.6 L,102 000 km,31500 $
2,2002,2.6 L,469 700 km,11700 AZN
3,1998,1.8 L,556 680 km,9700 AZN
4,2000,2.2 L,300 000 km,12700 AZN


In [121]:
def volConverter(col):
    return float(col.split()[0])

df["Volume"] = df["Volume"].apply(volConverter)

In [122]:
def kmConverter(col):
    return int(col.replace(" ", "")[ : -2])

df["Mileage"] = df["Mileage"].apply(kmConverter)

In [123]:
def aznConverter(col):
    price = col.split()
    if price[1] == "$":
        return int(price[0]) * 1.7
    return int(price[0])

df["Price"] = df["Price"].apply(aznConverter)

In [124]:
df.head()

Unnamed: 0,Year,Volume,Mileage,Price
0,1999,2.0,366000,12500.0
1,2014,1.6,102000,53550.0
2,2002,2.6,469700,11700.0
3,1998,1.8,556680,9700.0
4,2000,2.2,300000,12700.0


In [125]:
X = df.drop("Price", axis = 1)
y = df.Price

In [126]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1, random_state = 42)

In [127]:
class MyLinearRegression:
    
    def __init__(self):
        self.__w = None
        self.__X_copy = None   # we will use these copies in order to be able to denormalize values
        self.__y_copy = None
    
    def fit(self, X, y, lr=1e-2, iterations=5000):
        self.__X_copy = X.copy()
        self.__y_copy = y.copy()
        
        X, y = self.__normalize(X, y)
        
        inputs = np.hstack((np.ones((X.shape[0], 1)), X))
        outputs = np.array(y)
        
        self.__w = self.__getWeights(inputs.shape[1])
        
        for i in range(iterations):
            h = inputs.dot(self.__w)
            diff = h - outputs
            gradient = (inputs.T.dot(diff)) / len(outputs)
            self.__w = self.__w - lr * gradient
            
        return self.__w
    
    def intercept(self):
        return self.__w[0]

    def coefs(self):
        return self.__w[1 :]
        
    def __normalize(self, X, y=None):
        X = (X - self.__X_copy.mean()) / self.__X_copy.std()
        if y is not None:
            y = (y - self.__y_copy.mean()) / self.__y_copy.std()
            return X, y
        return X
    
    def __denormalize(self, y):
        return y * self.__y_copy.std() + self.__y_copy.mean()
    
    def __getWeights(self, n):
        return np.zeros(n)

    def predict(self, X):
        X = self.__normalize(X)
        inputs = np.hstack((np.ones((X.shape[0], 1)), X))
        predictions = inputs.dot(self.__w)
        
        return self.__denormalize(predictions)
        

In [128]:
myModel = MyLinearRegression()

myModel.fit(X_train, y_train)

predictions = myModel.predict(X_test)

In [129]:
df1 = X_test.copy()
df1["price"] = y_test
df1["My Predictions"] = np.round(predictions, 1)
df1["My Error"] = np.abs(df1.price - df1["My Predictions"])
df1.head()

Unnamed: 0,Year,Volume,Mileage,price,My Predictions,My Error
1201,1998,1.8,360395,11500.0,12698.6,1198.6
115,2001,2.6,230000,12400.0,14312.7,1912.7
979,2000,3.2,241000,10500.0,7929.3,2570.7
175,1995,2.0,333283,8000.0,5375.0,2625.0
63,1999,2.0,366000,12500.0,13284.6,784.6


In [130]:
model = LinearRegression()
model.fit(X_train, y_train)
model_preds = model.predict(X_test)

In [131]:
df1["Model Predictions"] = np.round(model_preds, 1)
df1["Model Error"] = np.abs(df1["Model Predictions"] - df1.price)

In [132]:
df1.head()

Unnamed: 0,Year,Volume,Mileage,price,My Predictions,My Error,Model Predictions,Model Error
1201,1998,1.8,360395,11500.0,12698.6,1198.6,12698.6,1198.6
115,2001,2.6,230000,12400.0,14312.7,1912.7,14312.7,1912.7
979,2000,3.2,241000,10500.0,7929.3,2570.7,7929.3,2570.7
175,1995,2.0,333283,8000.0,5375.0,2625.0,5375.0,2625.0
63,1999,2.0,366000,12500.0,13284.6,784.6,13284.6,784.6
