In [None]:
import pandas as pd
import numpy as np

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
df=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/train.csv')
df.head()

Unnamed: 0,Price,Year,Mileage,City,State,Vin,Make,Model
0,29495,2013,73121,Oklahoma City,OK,1GNSKBE01DR127341,Chevrolet,TahoeLT
1,35969,2016,31800,Conyers,GA,1C4BJWDGXGL119960,Jeep,Wrangler
2,21477,2016,40655,Elk Grove Village,IL,2G1145S39G9149688,Chevrolet,ImpalaLTZ
3,13889,2015,41646,Marysville,WA,1G1PE5SB5F7250712,Chevrolet,Cruze2LT
4,14225,2016,33535,Toledo,OH,1N4AL3AP2GC197633,Nissan,Altima2.5


In [None]:
target=df['Price']
df=df.drop(['Price','Vin'],axis=1)
df.head()

Unnamed: 0,Year,Mileage,City,State,Make,Model
0,2013,73121,Oklahoma City,OK,Chevrolet,TahoeLT
1,2016,31800,Conyers,GA,Jeep,Wrangler
2,2016,40655,Elk Grove Village,IL,Chevrolet,ImpalaLTZ
3,2015,41646,Marysville,WA,Chevrolet,Cruze2LT
4,2016,33535,Toledo,OH,Nissan,Altima2.5


In [None]:
#get all categorical columns
cat_columns = df.select_dtypes(['object']).columns
#convert all categorical columns to numeric
df[cat_columns] = df[cat_columns].apply(lambda x: pd.factorize(x)[0])

In [None]:
from math import sqrt
df=df.values
# calculate column means
def column_means(dataset):
	means = [0 for i in range(len(dataset[0]))]
	for i in range(len(dataset[0])):
		col_values = [row[i] for row in dataset]
		means[i] = sum(col_values) / float(len(dataset))
	return means

# calculate column standard deviations
def column_stdevs(dataset, means):
	stdevs = [0 for i in range(len(dataset[0]))]
	for i in range(len(dataset[0])):
		variance = [pow(row[i]-means[i], 2) for row in dataset]
		stdevs[i] = sum(variance)
	stdevs = [sqrt(x/(float(len(dataset)-1))) for x in stdevs]
	return stdevs

# standardize dataset
def standardize_dataset(dataset, means, stdevs):
	for row in dataset:
		for i in range(len(row)):
			row[i] = (row[i] - means[i]) / stdevs[i]
means = column_means(df)
stdevs = column_stdevs(df, means)

# standardize dataset
standardize_dataset(df, means, stdevs)
target=target.values

In [None]:
def loss(y, y_hat):

    loss = np.mean((y_hat - y)**2)
    return loss
def gradients(X, y, y_hat):


    m = X.shape[0]

    # Gradient of loss w.r.t weights.
    dw = (1/m)*np.dot(X.T, (y_hat - y))

    # Gradient of loss w.r.t bias.
    db = (1/m)*np.sum((y_hat - y))

    return dw, db
def x_transform(X, degrees):


    t = X.copy()

    # Appending columns of higher degrees to X.
    for i in degrees:
        X = np.append(X, t**i, axis=1)

    return X
def train(X, y, bs, degrees, epochs, lr):


    # Adding features to input X.
    x = x_transform(X, degrees)

    m, n = x.shape

    # Initializing weights and bias to zeros.
    w = np.zeros((n,1))
    b = 0

    # Reshaping y.
    y = y.reshape(m,1)

    # Empty list to store losses.
    losses = []

    # Training loop.
    for epoch in range(epochs):
        for i in range((m-1)//bs + 1):

            # Defining batches.
            start_i = i*bs
            end_i = start_i + bs
            xb = x[start_i:end_i]
            yb = y[start_i:end_i]

            # Calculating hypothesis
            y_hat = np.dot(xb, w) + b

            # Getting the gradients of loss w.r.t parameters.
            dw, db = gradients(xb, yb, y_hat)

            # Updating the parameters.
            w -= lr*dw
            b -= lr*db

        l = loss(y, np.dot(x, w) + b)
        losses.append(l)

    return w, b, losses
# Predicting function.
def predict(X, w, b, degrees):

    x1 = x_transform(X, degrees)

    return np.dot(x1, w) + b
w, b, l = train(df, target, bs=100, degrees=[2], epochs=1000,
                lr=0.01)

In [None]:
print( "Trained W",w )

print( "Trained b", b )

Trained W [[ 5341.4099526 ]
 [-3446.41468626]
 [ -244.43707787]
 [ -250.38045466]
 [ -654.45997436]
 [ -506.63806855]
 [  607.24849546]
 [  151.11314027]
 [  213.84670014]
 [  233.59812232]
 [ 1950.88738492]
 [ 1335.14529289]]
Trained b 20983.14698238285


In [None]:
Y_pred=predict(df[0].reshape(1,6), w, b, [2])

print( "Predicted values ", Y_pred)

print( "Real values      ", target[0] )

Predicted values  [[24530.75669666]]
Real values       29495


In [None]:
df=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/test.csv')
df.head()

Unnamed: 0,Year,Mileage,City,State,Vin,Make,Model,CarId
0,2013,72740,Skokie,IL,1FMCU0F77DUB44225,Ford,EscapeS,0
1,2017,5355,Miami Gardens,FL,1FADP3K27HL246539,Ford,FocusSE,1
2,2016,41454,Raleigh,NC,4T1BF1FK4GU163478,Toyota,CamrySE,2
3,2017,19870,Albany,GA,1GNSCBKC0HR161613,Chevrolet,TahoeLT,3
4,2016,22576,Chesapeake,VA,1G1PG5SB2G7122567,Chevrolet,Cruze,4


In [None]:
result=df['CarId']
df=df.drop(['CarId','Vin'],axis=1)
df.head()

Unnamed: 0,Year,Mileage,City,State,Make,Model
0,2013,72740,Skokie,IL,Ford,EscapeS
1,2017,5355,Miami Gardens,FL,Ford,FocusSE
2,2016,41454,Raleigh,NC,Toyota,CamrySE
3,2017,19870,Albany,GA,Chevrolet,TahoeLT
4,2016,22576,Chesapeake,VA,Chevrolet,Cruze


In [None]:
#get all categorical columns
cat_columns = df.select_dtypes(['object']).columns
#convert all categorical columns to numeric
df[cat_columns] = df[cat_columns].apply(lambda x: pd.factorize(x)[0])
df=df.values
means = column_means(df)
stdevs = column_stdevs(df, means)

# standardize dataset
standardize_dataset(df, means, stdevs)
Y_pred = predict( df , w, b, [2])


In [None]:
Y_pred=Y_pred.reshape(50000,)

In [None]:
df=pd.DataFrame({'CarId':result,'Price':Y_pred})
df.head()

Unnamed: 0,CarId,Price
0,0,24530.756697
1,1,34076.942971
2,2,21925.409337
3,3,27390.089208
4,4,21441.43076


In [None]:
df.to_csv('Pdf2.csv',index=False)