# **CLASSIFICATION**

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
#For more information about the dataset visit: https://archive.ics.uci.edu/dataset/451/breast+cancer+coimbra
cancer_dataset = pd.read_csv(r'dataR2.csv')

In [3]:
cancer_dataset.head()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
0,48,23.5,70,2.707,0.467409,8.8071,9.7024,7.99585,417.114,1
1,83,20.690495,92,3.115,0.706897,8.8438,5.429285,4.06405,468.786,1
2,82,23.12467,91,4.498,1.009651,17.9393,22.43204,9.27715,554.697,1
3,68,21.367521,77,3.226,0.612725,9.8827,7.16956,12.766,928.22,1
4,86,21.111111,92,3.549,0.805386,6.6994,4.81924,10.57635,773.92,1


In [4]:
cancer_dataset.tail()

Unnamed: 0,Age,BMI,Glucose,Insulin,HOMA,Leptin,Adiponectin,Resistin,MCP.1,Classification
111,45,26.85,92,3.33,0.755688,54.68,12.1,10.96,268.23,2
112,62,26.84,100,4.53,1.1174,12.45,21.42,7.32,330.16,2
113,65,32.05,97,5.73,1.370998,61.48,22.54,10.33,314.05,2
114,72,25.59,82,2.82,0.570392,24.96,33.75,3.27,392.46,2
115,86,27.18,138,19.91,6.777364,90.28,14.11,4.35,90.09,2


Linear Regression

In [5]:
class LogitRegression: 
    def __init__(self, lr, itr):         
        self.lr = lr         
        self.itr = itr 

    def fit(self, X, Y):         
        self.m, self.n = X.shape         
        self.W = np.zeros(self.n)         
        self.b = 0        
        self.X = X         
        self.Y = Y 
          
        for i in range(self.itr):             
            self.update()             
        return self
      
    def update(self):            
        A = 1 / (1 + np.exp(-(self.X.dot(self.W) + self.b))) 
        tmp = (A - self.Y.T)         
        tmp = np.reshape(tmp, self.m)         
        dW = np.dot(self.X.T, tmp) / self.m          
        db = np.sum(tmp) / self.m  
          
        self.W = self.W - self.lr * dW     
        self.b = self.b - self.lr * db 
          
        return self
      
    def predict(self, X):     
        Z = 1 / (1 + np.exp(-(X.dot(self.W) + self.b)))         
        Y = np.where(Z > 0.5, 1, 0)         
        return Y 

    def accuracy(self, y_test, y_pred):
        correctly_classified = np.sum(y_test == y_pred)
        accuracy = correctly_classified / len(y_test)
        return accuracy

def main(): 
    # Importing dataset     
    df = cancer_dataset 
    X = df.iloc[:,:-1].values 
    Y = df.iloc[:,-1].values
      
    # Splitting dataset into train and test set 
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0) 
      
    model = LogitRegression(lr=0.001, itr=5000) 
    model.fit(X_train, Y_train)     
      
      
    Y_pred = model.predict(X_test)     

    score = model.accuracy(Y_test, Y_pred)
    
    print("Score:", score)
      
  
if __name__ == "__main__":      
    main()


Score: 0.4583333333333333


SVM(Support Vector Machines)

In [7]:
#Write Code for SVM here
from sklearn.svm import SVC

x = cancer_dataset.iloc[:, :-1]
y = cancer_dataset.iloc[:,-1]

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

clf = SVC(kernel='linear', C=3)
clf.fit(x_train, y_train)

print(clf.score(x_test, y_test))

0.8333333333333334




---



# **LINEAR REGRESSION**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

print(pd.__version__)
print(np.__version__)


In [None]:
real_estate_dataset = pd.read_excel(r'Real estate valuation data set.xlsx')
real_estate_dataset = real_estate_dataset.drop(columns=['No'], axis=1)
print(real_estate_dataset.head())

In [None]:

class LinearRegression:
    def __init__(self, lr, ltr):
        self.lr = lr
        self.ltr = ltr
        
    def fit(self, X, Y):
        self.m, self.n = X.shape
        self.W = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y
        
        for i in range(self.ltr):
            self.update()
            
        return self
    
    def update(self):
        Y_pred = self.predict(self.X)
        dW = - (2 * (self.X.T).dot(self.Y - Y_pred)) / self.m
        db = - 2 * np.sum(self.Y - Y_pred) / self.m
        self.W = self.W - self.lr * dW
        self.b = self.b - self.lr * db
        return self
    
    def predict(self, X):
        return X.dot(self.W) + self.b


    def score(self, y_test, y_pred):
        y_mean = np.mean(y_test)
        sqtotal = np.sum((y_test - y_mean) ** 2)
        sqresidual = np.sum((y_test - y_pred) ** 2)
        score = 1 - (sqresidual / sqtotal)
        return score

def main():
    real_estate_dataset = pd.read_excel(r'Real estate valuation data set.xlsx')
    real_estate_dataset = real_estate_dataset.drop(columns=['No'], axis=1)
    print(real_estate_dataset.head())
    
    X = real_estate_dataset.iloc[:, :-1].values
    Y = real_estate_dataset.iloc[:, -1].values
    
    X = (X - X.mean(axis=0)) / X.std(axis=0)
    
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)
    
    model = LinearRegression(ltr=1000, lr=0.005)
    model.fit(X_train, Y_train)
    
    Y_pred = model.predict(X_test)
    
    score = model.score(Y_test, Y_pred)
    
    print("Score:", score)
    
if __name__ == "__main__":
    main()
