In [71]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.utils import shuffle

In [72]:
df_train = pd.read_csv('glass.csv')

In [73]:
df_train

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


In [74]:
total = df_train.isnull().sum().sort_values(ascending=False)
percent = (df_train.isnull().sum()/df_train.isnull().count()).sort_values(ascending=False)
missing_data = pd.concat([total, percent], axis=1, keys=["Total", "Percent"])
missing_data.head(20)

Unnamed: 0,Total,Percent
Type,0,0.0
Fe,0,0.0
Ba,0,0.0
Ca,0,0.0
K,0,0.0
Si,0,0.0
Al,0,0.0
Mg,0,0.0
Na,0,0.0
RI,0,0.0


In [75]:
print("Type: ",df_train.Type.unique())

Type:  [1 2 3 5 6 7]


In [76]:
df_train

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


In [36]:
import time
import numpy as np
from itertools import combinations_with_replacement
from sympy.core import Mul
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.utils import shuffle

In [37]:
def new_features(variables):
    variables=variables.tolist()
    max_degree = 4
    min_degree = 0
    if not variables or max_degree == 0:
        return np.array([1])
    variables = np.array(list(variables) + [1])
    monomials_list_comm = np.empty(0)
    for item in combinations_with_replacement(variables, max_degree):
        powers = dict()
        for variable in variables:
            powers[variable] = 0
        for variable in item:
            if variable != 1:
                powers[variable] += 1
        if max(powers.values()) >= min_degree:
            monomials_list_comm = np.append(monomials_list_comm,Mul(*item))
    return np.array(list(monomials_list_comm))

In [38]:
def compute_cost(W, X, Y):
    # calculate hinge loss
    N = X.shape[0]
    distances = 1 - Y * (np.dot(X, W))
    distances[distances < 0] = 0  # equivalent to max(0, distance)
    hinge_loss = regularization_strength * (np.sum(distances) / N)

    # calculate cost
    cost = 1 / 2 * np.dot(W, W) + hinge_loss
    return cost

In [39]:
def calculate_cost_gradient(W, X_batch, Y_batch):
    # if only one example is passed (eg. in case of SGD)
    if type(Y_batch) == np.float64:
        Y_batch = np.array([Y_batch])
        X_batch = np.array([X_batch])  # gives multidimensional array

    distance = 1 - (Y_batch * np.dot(X_batch, W))
    dw = np.zeros(len(W))
    
    for ind, d in enumerate(distance):
        if max(0, d) == 0:
            di = W
        else:
            di = W - (regularization_strength * Y_batch[ind] * X_batch[ind])
        dw += di

    dw = dw/len(Y_batch)  # average
    return dw

In [40]:
def sgd(features, outputs):
    max_epochs = 5000
    weights = np.zeros(features.shape[1])
    nth = 0
    prev_cost = float("inf")
    cost_threshold = 0.01  # in percent
    # stochastic gradient descent
    for epoch in range(1, max_epochs):
        # shuffle to prevent repeating update cycles
        X, Y = shuffle(features, outputs)
        for ind, x in enumerate(X):
            ascent = calculate_cost_gradient(weights, x, Y[ind])
            weights = weights - (learning_rate * ascent)

        # convergence check on 2^nth epoch
        if epoch == 2 ** nth or epoch == max_epochs - 1:
            cost = compute_cost(weights, features, outputs)
            print("Epoch is: {} and Cost is: {}".format(epoch, cost))
            # stoppage criterion
            
            prev_cost = cost
            nth += 1
    return weights

In [41]:
def init3():
    print("reading dataset...")
    # read data in pandas (pd) data frame
    data = pd.read_csv('glass.csv')
    print(data.head())

    # drop last column (extra column added by pd)
    # and unnecessary first column (id)
    data.drop(data.columns[[0]], axis=1, inplace=True)
    
    #Mapping values of target variable quality to 'low', 'medium' and 'high' categories for classification
    data['Type']=data['Type'].map({1:'low',2:'low',3:'low', 5:'medium', 6:'medium', 7:'high'})  
    #df['quality']=df['quality'].map({'low':0,'medium':1,'high':2})
    
    
    diag_map = {'low': 1.0, 'medium': -1.0,'high':-1.0}
    data['Type'] = data['Type'].map(diag_map)
    
    
    
    # put features & outputs in different data frames
    Y = data.loc[:, 'Type']
    X = data.iloc[:, 0:-1]
    # filter features
    #remove_correlated_features(X)
    #Xnew = np.apply_along_axis(new_features, 1, X)
    #remove_less_significant_features(X, Y)
    # normalize data for better convergence and to prevent overflow
    #X_normalized = MinMaxScaler().fit_transform(Xnew)
    #X = pd.DataFrame(X_normalized)

    # insert 1 in every row for intercept b
    #X.insert(loc=len(X.columns), column='intercept', value=1)

    # split data into train and test set
    print("splitting dataset into train and test sets...")
    X_train, X_test, y_train, y_test = tts(X, Y, test_size=0.2, random_state=42)
    # train the model
    print("training started...")
    W = sgd(X_train.to_numpy(), y_train.to_numpy())
    print("training finished.")
    print("weights are: {}".format(W))
    print(W.shape)
    # testing the model
    print("testing the model...")
    y_train_predicted = np.array([])
    for i in range(X_train.shape[0]):
        yp = np.sign(np.dot(X_train.to_numpy()[i], W))
        y_train_predicted = np.append(y_train_predicted, yp)

    y_test_predicted = np.array([])
    for i in range(X_test.shape[0]):
        yp = np.sign(np.dot(X_test.to_numpy()[i], W))
        y_test_predicted = np.append(y_test_predicted, yp)
    # An unknow data given by me to check which class it belongs to
    #a = np.array([0.8, 0.6, 0.75, 0.75, 1])
    #yp = np.sign(np.dot(a, W))
    #print("Y value for a : ",yp)
    print("accuracy on test dataset: {}".format(accuracy_score(y_test, y_test_predicted)))
    print("recall on test dataset: {}".format(recall_score(y_test, y_test_predicted)))
    print("precision on test dataset: {}".format(recall_score(y_test, y_test_predicted)))

In [42]:
regularization_strength = 1000
learning_rate = 0.001

In [43]:
start = time.time()
init3()
end = time.time()
print("Time taken to Evaluate: ",end - start)

reading dataset...
        RI     Na    Mg    Al     Si     K    Ca   Ba   Fe  Type
0  1.52101  13.64  4.49  1.10  71.78  0.06  8.75  0.0  0.0     1
1  1.51761  13.89  3.60  1.36  72.73  0.48  7.83  0.0  0.0     1
2  1.51618  13.53  3.55  1.54  72.99  0.39  7.78  0.0  0.0     1
3  1.51766  13.21  3.69  1.29  72.61  0.57  8.22  0.0  0.0     1
4  1.51742  13.27  3.62  1.24  73.08  0.55  8.07  0.0  0.0     1
splitting dataset into train and test sets...
training started...
Epoch is: 1 and Cost is: 433355.85874132754
Epoch is: 2 and Cost is: 743982.0331524357
Epoch is: 4 and Cost is: 348601.92269628827
Epoch is: 8 and Cost is: 60117.23370016706
Epoch is: 16 and Cost is: 931094.4424995758
Epoch is: 32 and Cost is: 62888.15009307255
Epoch is: 64 and Cost is: 257035.4333871593
Epoch is: 128 and Cost is: 840830.7037261883
Epoch is: 256 and Cost is: 692308.8447983871
Epoch is: 512 and Cost is: 82294.64631625357
Epoch is: 1024 and Cost is: 141944.56878714214
Epoch is: 2048 and Cost is: 219796.92

In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.utils import shuffle

In [2]:
df_train = pd.read_csv('glass.csv')

In [3]:
df_train

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


In [4]:
# drop last column (extra column added by pd)
    # and unnecessary first column (id)
df_train.drop(df_train.columns[[0]], axis=1, inplace=True)
    
    #Mapping values of target variable quality to 'low', 'medium' and 'high' categories for classification
df_train['Type']=df_train['Type'].map({1:'low',2:'low',3:'low', 5:'medium', 6:'medium', 7:'high'})  
    #df['quality']=df['quality'].map({'low':0,'medium':1,'high':2})
    
    
diag_map = {'low': 1.0, 'medium': -1.0,'high':-1.0}
df_train['Type'] = df_train['Type'].map(diag_map)

In [5]:
df_train

Unnamed: 0,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1.0
1,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1.0
2,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1.0
3,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1.0
4,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1.0
...,...,...,...,...,...,...,...,...,...
209,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,-1.0
210,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,-1.0
211,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,-1.0
212,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,-1.0


In [6]:
df_lable1 = df_train["Type"]

In [7]:
df_lable1

0      1.0
1      1.0
2      1.0
3      1.0
4      1.0
      ... 
209   -1.0
210   -1.0
211   -1.0
212   -1.0
213   -1.0
Name: Type, Length: 214, dtype: float64

In [8]:
df_train.drop('Type',axis=1,inplace=True)

In [9]:
df_train

Unnamed: 0,Na,Mg,Al,Si,K,Ca,Ba,Fe
0,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0
1,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0
2,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0
3,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0
4,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0
...,...,...,...,...,...,...,...,...
209,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0
210,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0
211,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0
212,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0


In [19]:
import time
from sklearn.model_selection import train_test_split 

In [20]:
from sklearn.svm import SVC
X_train, X_test, y_train, y_test = train_test_split(df_train,df_lable1,test_size = 0.5)

In [21]:
model = SVC(kernel = 'rbf')
model.fit(X_train, y_train)

SVC(C=1.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [22]:
start = time.time()

score = model.score(X_test,y_test)
print(score)
end = time.time()
print("Time taken to Evaluate: ",end - start)

0.6635514018691588
Time taken to Evaluate:  0.0039899349212646484


In [23]:
import time
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.utils import shuffle

In [24]:
def lvec(a):
    n = a.shape[0]
    s = np.empty(0)
    for i in range(n):
        temp = a[i]*a[i:]
        temp[0] = temp[0]/2
        s = np.append(s,temp)
    return s

In [25]:
def create_z(x):
    s = lvec(x)
    z = np.empty(0)
    z = np.append(z,s)
    z = np.append(z,x)
    z = np.append(z,1)
    return z

In [26]:
def create_r(x):
    z = create_z(x)
    eta = lvec(z)
    s = lvec(x)
    r = np.empty(0)
    r = np.append(r,eta)
    r = np.append(r,s)
    return r

In [27]:
def new_feature(x):
    r = create_r(x)
    xnew = np.empty(0)
    xnew = np.append(xnew,r)
    xnew = np.append(xnew,x)
    xnew = np.append(xnew,1)
    return xnew

In [28]:
def remove_correlated_features(X):
    corr_threshold = 0.9
    corr = X.corr()
    drop_columns = np.full(corr.shape[0], False, dtype=bool)
    for i in range(corr.shape[0]):
        for j in range(i + 1, corr.shape[0]):
            if corr.iloc[i, j] >= corr_threshold:
                drop_columns[j] = True
    columns_dropped = X.columns[drop_columns]
    X.drop(columns_dropped, axis=1, inplace=True)
    return columns_dropped


def remove_less_significant_features(X, Y):
    sl = 0.05
    regression_ols = None
    columns_dropped = np.array([])
    for itr in range(0, len(X.columns)):
        regression_ols = sm.OLS(Y, X).fit()
        max_col = regression_ols.pvalues.idxmax()
        max_val = regression_ols.pvalues.max()
        if max_val > sl:
            X.drop(max_col, axis='columns', inplace=True)
            columns_dropped = np.append(columns_dropped, [max_col])
        else:
            break
    regression_ols.summary()
    return columns_dropped

In [29]:
def compute_cost(W, X, Y):
    # calculate hinge loss
    N = X.shape[0]
    distances = 1 - Y * (np.dot(X, W))
    distances[distances < 0] = 0  # equivalent to max(0, distance)
    hinge_loss = regularization_strength * (np.sum(distances) / N)

    # calculate cost
    cost = 1 / 2 * np.dot(W, W) + hinge_loss
    return cost

In [30]:
def calculate_cost_gradient(W, X_batch, Y_batch):
    # if only one example is passed (eg. in case of SGD)
    if type(Y_batch) == np.float64:
        Y_batch = np.array([Y_batch])
        X_batch = np.array([X_batch])  # gives multidimensional array

    distance = 1 - (Y_batch * np.dot(X_batch, W))
    dw = np.zeros(len(W))

    for ind, d in enumerate(distance):
        if max(0, d) == 0:
            di = W
        else:
            di = W - (regularization_strength * Y_batch[ind] * X_batch[ind])
        dw += di

    dw = dw/len(Y_batch)  # average
    return dw

In [31]:
def sgd(features, outputs):
    max_epochs = 5000
    weights = np.zeros(features.shape[1])
    nth = 0
    prev_cost = float("inf")
    cost_threshold = 0.01  # in percent
    # stochastic gradient descent
    for epoch in range(1, max_epochs):
        # shuffle to prevent repeating update cycles
        X, Y = shuffle(features, outputs)
        for ind, x in enumerate(X):
            ascent = calculate_cost_gradient(weights, x, Y[ind])
            weights = weights - (learning_rate * ascent)

        # convergence check on 2^nth epoch
        if epoch == 2 ** nth or epoch == max_epochs - 1:
            cost = compute_cost(weights, features, outputs)
            print("Epoch is: {} and Cost is: {}".format(epoch, cost))
            # stoppage criterion
            if abs(prev_cost - cost) < cost_threshold * prev_cost:
                return weights
            prev_cost = cost
            nth += 1
    return weights

In [32]:
def init3():
    print("reading dataset...")
    # read data in pandas (pd) data frame
    data = pd.read_csv('glass.csv')
    print(data.head())

    # drop last column (extra column added by pd)
    # and unnecessary first column (id)
    data.drop(data.columns[[0]], axis=1, inplace=True)
    
    #Mapping values of target variable quality to 'low', 'medium' and 'high' categories for classification
    data['Type']=data['Type'].map({1:'low',2:'low',3:'low', 5:'medium', 6:'medium', 7:'high'})  
    #df['quality']=df['quality'].map({'low':0,'medium':1,'high':2})
    
    
    diag_map = {'low': 1.0, 'medium': -1.0,'high':-1.0}
    data['Type'] = data['Type'].map(diag_map)
    
    
    
    # put features & outputs in different data frames
    Y = data.loc[:, 'Type']
    X = data.iloc[:, 0:-1]
    # filter features
    #remove_correlated_features(X)
    #Xnew = np.apply_along_axis(new_features, 1, X)
    #remove_less_significant_features(X, Y)
    # normalize data for better convergence and to prevent overflow
    #X_normalized = MinMaxScaler().fit_transform(Xnew)
    #X = pd.DataFrame(X_normalized)

    # insert 1 in every row for intercept b
    X.insert(loc=len(X.columns), column='intercept', value=1)

    # split data into train and test set
    print("splitting dataset into train and test sets...")
    X_train, X_test, y_train, y_test = tts(X, Y, test_size=0.2, random_state=42)
    # train the model
    print("training started...")
    W = sgd(X_train.to_numpy(), y_train.to_numpy())
    print("training finished.")
    print("weights are: {}".format(W))
    print(W.shape)
    # testing the model
    print("testing the model...")
    y_train_predicted = np.array([])
    for i in range(X_train.shape[0]):
        yp = np.sign(np.dot(X_train.to_numpy()[i], W))
        y_train_predicted = np.append(y_train_predicted, yp)

    y_test_predicted = np.array([])
    for i in range(X_test.shape[0]):
        yp = np.sign(np.dot(X_test.to_numpy()[i], W))
        y_test_predicted = np.append(y_test_predicted, yp)
    # An unknow data given by me to check which class it belongs to
    #a = np.array([0.8, 0.6, 0.75, 0.75, 1])
    #yp = np.sign(np.dot(a, W))
    #print("Y value for a : ",yp)
    print("accuracy on test dataset: {}".format(accuracy_score(y_test, y_test_predicted)))
    print("recall on test dataset: {}".format(recall_score(y_test, y_test_predicted)))
    print("precision on test dataset: {}".format(recall_score(y_test, y_test_predicted)))

In [33]:
regularization_strength = 10000
learning_rate = 0.000001

In [34]:
start = time.time()

init3()

end = time.time()
print("Time taken to Evaluate: ",end - start)

reading dataset...
        RI     Na    Mg    Al     Si     K    Ca   Ba   Fe  Type
0  1.52101  13.64  4.49  1.10  71.78  0.06  8.75  0.0  0.0     1
1  1.51761  13.89  3.60  1.36  72.73  0.48  7.83  0.0  0.0     1
2  1.51618  13.53  3.55  1.54  72.99  0.39  7.78  0.0  0.0     1
3  1.51766  13.21  3.69  1.29  72.61  0.57  8.22  0.0  0.0     1
4  1.51742  13.27  3.62  1.24  73.08  0.55  8.07  0.0  0.0     1
splitting dataset into train and test sets...
training started...
Epoch is: 1 and Cost is: 102825.1295010567
Epoch is: 2 and Cost is: 88047.28981434723
Epoch is: 4 and Cost is: 58877.758513027606
Epoch is: 8 and Cost is: 10651.62744096841
Epoch is: 16 and Cost is: 49910.05536844987
Epoch is: 32 and Cost is: 15084.761558395401
Epoch is: 64 and Cost is: 12623.427849848227
Epoch is: 128 and Cost is: 13560.293442045426
Epoch is: 256 and Cost is: 21004.031472612493
Epoch is: 512 and Cost is: 20823.971694641474
training finished.
weights are: [ -8.01901245  27.92260973 -17.03162323   0.1959

In [1]:
import time
import numpy as np
from itertools import combinations_with_replacement
import pandas as pd
from sklearn.svm import SVR
from sklearn.svm import LinearSVR as LSVR
from sklearn.preprocessing import MinMaxScaler

In [2]:
df_train = pd.read_csv('glass.csv')

In [3]:
df_train

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


In [4]:
def Multiply(a):
    ans = 1
    for i in a:
        ans*=i
    return ans

In [5]:
def Create_features(variables):
    ans = []
    for i in range(1,5):
        l = list(combinations_with_replacement(variables, i))
        for j in l:
            ans.append(Multiply(j))
            #print(Multiply(j))
    #ans.append(1)
    ans = np.array(ans).astype(float)
    return ans.reshape((ans.shape[0],1)) 

In [11]:
col = df_train.columns
df_train[col[:-2]] = MinMaxScaler().fit_transform(df_train[col[:-2]])
df_train.head()

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe
0,0.432836,0.437594,1.0,0.252336,0.351786,0.009662,0.30855,0.0,0.0
1,0.283582,0.475188,0.801782,0.333333,0.521429,0.077295,0.223048,0.0,0.0
2,0.220808,0.421053,0.790646,0.389408,0.567857,0.062802,0.218401,0.0,0.0
3,0.285777,0.372932,0.821826,0.311526,0.5,0.091787,0.259294,0.0,0.0
4,0.275241,0.381955,0.806236,0.29595,0.583929,0.088567,0.245353,0.0,0.0


In [12]:
start = time.time()
X = df_train.to_numpy()
#Y = np.reshape(Y,(Y.shape[0],1))
Xnew = []
for i in range(X.shape[0]):
    if (i%500==0):
        print("Row Count: ",i)
    Xnew.append(Create_features(X[i]))
Xnew = np.array(Xnew)
X = np.reshape(Xnew,Xnew.shape[0:2])
regr = LSVR(max_iter=50000).fit(X,Y)
print("Coefficients are: ",regr.coef_)
print("Intercept is: ",regr.intercept_)
print("Score is: ",regr.score(X,Y))
end = time.time()
print("Time taken to Evaluate: ",end-start)

Row Count:  0
Coefficients are:  [-1.32616306e-01  1.04415384e+00 -5.70536326e-01  1.37261650e+00
  4.71435417e-01  7.59345863e-02  4.81260553e-01  4.50440518e-01
 -7.87815786e-02 -2.04622317e-01  1.79399440e-01 -4.15725795e-01
  3.84033006e-01 -2.27684119e-01 -4.96877964e-02 -9.53276132e-02
 -9.21701277e-02  1.45815101e-01  5.84731078e-01  4.04657988e-02
  6.70496146e-01  3.78979680e-01 -1.34297065e-02  4.99045094e-01
  1.44125104e-01  5.39617762e-02 -4.59705079e-01 -7.87643846e-02
 -7.25942411e-01 -1.50384619e-01 -2.46511294e-01  2.24085352e-01
  2.00090646e-01  6.49456211e-01  6.51479221e-01  4.22380539e-02
  7.25565587e-01  6.42478279e-02 -1.79339269e-01  3.58712327e-01
  8.56808433e-02  1.42090664e-01  2.69770251e-01 -2.11963870e-01
  1.74198613e-01 -3.88922814e-02  2.11620928e-01 -2.39387057e-02
  1.13865360e-01  5.03808324e-02 -6.83146496e-02  4.59451451e-01
  1.11598516e-01  2.68497892e-01 -1.08698262e-01  2.20342172e-02
 -4.81886178e-02  1.31034047e-01 -2.47540949e-01 -8.67004

In [13]:
start = time.time()
X = df_train.to_numpy()
regr = SVR(kernel='poly',degree=4,max_iter=50000).fit(X,Y)
print("Score is: ",regr.score(X,Y))
end = time.time()
print("Time taken to Evaluate: ",end - start)

Score is:  0.8295976561461429
Time taken to Evaluate:  0.017046451568603516


In [14]:
start = time.time()
X = df_train.to_numpy()
regr = SVR(max_iter=50000).fit(X,Y)
print("Score is: ",regr.score(X,Y))
end = time.time()
print("Time taken to Evaluate: ",end - start)

Score is:  0.7843517203880761
Time taken to Evaluate:  0.0029916763305664062


In [None]:
import matplotlib.pyplot as plt
# line 1 points
x1 = [10,20,30]
y1 = [78.65,82.95,78.43]
# plotting the line 1 points 
plt.plot(x1, y1, label = "line 1")
# line 2 points
x2 = [10,20,30]
y2 = [40,10,30]
# plotting the line 2 points 
plt.plot(x2, y2, label = "line 2")
plt.xlabel('x - axis')
# Set the y axis label of the current axis.
plt.ylabel('y - axis')
# Set a title of the current axes.
plt.title('Two or more lines on same plot with suitable legends ')
# show a legend on the plot
plt.legend()
# Display a figure.
plt.show()