In [None]:
import numpy as np
import pandas as pd

In [None]:
file=pd.read_csv('/content/vehicle.csv')
cars_df=pd.DataFrame(file)
cars_df.shape


(846, 19)

In [None]:
print(cars_df.isnull().sum())

compactness                    0
circularity                    5
distance_circularity           4
radius_ratio                   6
pr.axis_aspect_ratio           2
max.length_aspect_ratio        0
scatter_ratio                  1
elongatedness                  1
pr.axis_rectangularity         3
max.length_rectangularity      0
scaled_variance                3
scaled_variance.1              2
scaled_radius_of_gyration      2
scaled_radius_of_gyration.1    4
skewness_about                 6
skewness_about.1               1
skewness_about.2               1
hollows_ratio                  0
class                          0
dtype: int64


In [None]:
cars_df = cars_df.dropna()
cars_df.shape

(813, 19)

In [None]:

columns_to_scale = cars_df.columns.difference(['class'])  # excluding 'class'

# Apply manual min-max normalization to each column
for column in columns_to_scale:
    min_value = cars_df[column].min()
    max_value = cars_df[column].max()

    # Apply the Min-Max normalization formula
    cars_df[column] = (cars_df[column] - min_value) / (max_value - min_value)

print(cars_df.head())
print(cars_df.shape)


   compactness  circularity  distance_circularity  radius_ratio  \
0     0.478261     0.576923              0.597222      0.323144   
1     0.391304     0.307692              0.611111      0.161572   
2     0.673913     0.653846              0.916667      0.458515   
3     0.434783     0.307692              0.583333      0.240175   
4     0.260870     0.423077              0.416667      0.441048   

   pr.axis_aspect_ratio  max.length_aspect_ratio  scatter_ratio  \
0              0.274725                 0.150943       0.326797   
1              0.109890                 0.132075       0.241830   
2              0.208791                 0.150943       0.620915   
3              0.175824                 0.132075       0.209150   
4              0.615385                 0.943396       0.241830   

   elongatedness  pr.axis_rectangularity  max.length_rectangularity  \
0       0.457143                0.250000                   0.585714   
1       0.542857                0.166667            

In [None]:

cars_df['shape_descriptor'] = cars_df['distance_circularity'] + cars_df['radius_ratio'] + cars_df['scatter_ratio']

# Combine aspect ratio columns
cars_df['aspect_ratio'] = (cars_df['pr.axis_aspect_ratio'] + cars_df['max.length_aspect_ratio']) / 2

# Combine rectangularity columns
cars_df['rectangularity'] = (cars_df['pr.axis_rectangularity'] + cars_df['max.length_rectangularity']) / 2

# Combine scaled variance columns
cars_df['variance'] = (cars_df['scaled_variance'] + cars_df['scaled_variance.1']) / 2

# Combine scaled radius of gyration columns
cars_df['radius_of_gyration'] = (cars_df['scaled_radius_of_gyration'] + cars_df['scaled_radius_of_gyration.1']) / 2

# Combine skewness columns
cars_df['skewness'] = (cars_df['skewness_about'] + cars_df['skewness_about.1'] + cars_df['skewness_about.2']) / 3

# Drop original columns that have been combined (optional)
cars_df = cars_df.drop(['distance_circularity', 'radius_ratio', 'scatter_ratio',
                        'pr.axis_aspect_ratio', 'max.length_aspect_ratio',
                        'pr.axis_rectangularity', 'max.length_rectangularity',
                        'scaled_variance', 'scaled_variance.1',
                        'scaled_radius_of_gyration', 'scaled_radius_of_gyration.1',
                        'skewness_about', 'skewness_about.1', 'skewness_about.2'], axis=1)

# Display the DataFrame after feature engineering
print(cars_df.head())
print(cars_df.shape)

   compactness  circularity  elongatedness  hollows_ratio class  \
0     0.478261     0.576923       0.457143       0.533333   van   
1     0.391304     0.307692       0.542857       0.600000   van   
2     0.673913     0.653846       0.171429       0.500000   car   
3     0.434783     0.307692       0.571429       0.866667   van   
4     0.260870     0.423077       0.542857       0.066667   bus   

   shape_descriptor  aspect_ratio  rectangularity  variance  \
0          1.247164      0.212834        0.417857  0.237959   
1          1.014513      0.120983        0.261905  0.192793   
2          1.996097      0.179867        0.535714  0.515121   
3          1.032658      0.153950        0.261905  0.153887   
4          1.099545      0.779390        0.269048  0.376638   

   radius_of_gyration  skewness  
0            0.308217  0.343213  
1            0.239614  0.394629  
2            0.441162  0.418625  
3            0.082920  0.427765  
4            0.695796  0.270239  
(813, 11)


In [None]:
X = cars_df.drop(columns=['class']).to_numpy()
print(X.shape)


(813, 10)


In [None]:
"""import numpy as np

# Assume 'cars_df' is your DataFrame with the 'class' column containing textual data
# Get unique classes and create a mapping from class labels to integers
unique_classes = cars_df['class'].unique()
class_to_int = {label: idx for idx, label in enumerate(unique_classes)}

# Map class labels to integers
Y = cars_df['class'].map(class_to_int).to_numpy()

# Now Y is a 1D array of integers representing class labels
print(Y)"""


[0 0 1 0 2 2 0 0 0 1 2 0 2 1 0 2 1 2 0 2 2 1 0 1 1 2 1 0 1 1 1 1 2 0 1 2 1
 0 0 1 1 0 0 2 0 1 1 1 1 2 2 0 1 0 1 0 1 1 0 2 2 1 1 0 2 1 1 1 0 1 1 2 2 2
 0 1 2 2 0 0 2 1 1 1 1 0 2 1 1 2 1 2 2 0 0 0 2 1 1 2 2 0 0 1 1 0 0 1 1 2 1
 0 0 1 0 0 2 2 0 2 1 1 1 0 1 0 0 0 1 0 1 1 2 1 1 0 2 1 0 1 2 1 1 0 2 1 0 1
 1 1 1 1 1 1 1 1 2 0 1 2 0 2 0 2 1 2 0 1 1 2 1 1 2 2 1 1 1 1 0 2 1 2 1 2 2
 2 0 1 1 1 0 0 1 2 1 1 0 2 2 1 1 1 2 0 1 0 2 1 0 1 0 1 1 0 1 2 1 0 0 2 1 2
 1 1 2 1 0 0 2 1 1 0 1 0 2 0 1 1 1 1 1 1 1 0 0 1 2 1 0 0 2 1 0 2 2 2 1 1 2
 1 2 2 1 0 2 1 1 0 0 2 0 2 2 2 1 1 2 1 1 2 0 2 2 1 1 1 1 0 2 1 2 0 0 1 0 1
 2 2 1 1 2 1 1 0 0 1 1 2 2 1 0 2 0 2 2 1 1 1 1 1 1 0 1 1 1 0 2 1 1 2 1 1 1
 1 1 1 0 1 1 0 1 2 1 2 2 2 0 2 1 1 1 1 1 2 2 1 1 0 1 0 0 1 2 0 0 2 1 1 2 2
 0 1 1 1 0 1 2 1 1 2 1 1 1 2 1 0 1 2 2 2 0 1 1 2 1 1 1 1 1 0 1 2 1 2 1 0 0
 1 1 1 1 0 0 1 2 0 1 1 2 0 2 1 1 2 1 2 0 0 1 1 0 1 1 1 2 2 2 1 0 0 1 2 1 1
 1 1 0 2 2 1 1 1 0 0 2 1 1 1 1 1 1 2 1 2 2 0 0 1 1 1 1 1 1 1 1 1 0 1 2 1 1
 1 0 1 1 1 1 0 1 0 0 1 1 

In [53]:
import numpy as np

class LogisticRegression:
    def __init__(self, alpha=0.01, max_iters=None, threshold=1e-5):
        self.weights = None
        self.alpha = alpha
        self.max_iters = max_iters
        self.threshold = threshold

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))  # Sigmoid function

    def loss(self, h, y):
        return -np.mean(y * np.log(h) + (1 - y) * np.log(1 - h))  # Logistic loss function

    def Train(self, X, Y, alpha=0.01, max_iters=None, print_loss_iter=100):
        self.alpha = alpha
        m, n = X.shape
        self.weights = np.zeros(n)
        self.max_iters = max_iters
        prev_loss = np.inf

        if max_iters is None:
            epoch = 0
            while True:
                z = np.dot(X, self.weights)
                h = self.sigmoid(z)
                loss = self.loss(h, Y)

                gradient = np.dot(X.T, (h - Y)) / m
                self.weights -= self.alpha * gradient

                if epoch % print_loss_iter == 0:
                    print(f"Iteration {epoch}: Loss = {loss}")

                if abs(prev_loss - loss) < self.threshold:
                    break

                prev_loss = loss
                epoch += 1
        else:
            # Training with a fixed number of iterations
            for epoch in range(max_iters):
                z = np.dot(X, self.weights)
                h = self.sigmoid(z)
                loss = self.loss(h, Y)

                # Update weights using gradient descent
                gradient = np.dot(X.T, (h - Y)) / m
                self.weights -= self.alpha * gradient

                if epoch % print_loss_iter == 0:
                    print(f"Iteration {epoch}: Loss = {loss}")

    def Predict_Class(self, X):
        z = np.dot(X, self.weights)
        h = self.sigmoid(z)
        return np.where(h >= 0.5, 1, 0)  # Predict class based on threshold 0.5

    def Predict_Confidence(self, X):
        z = np.dot(X, self.weights)
        h = self.sigmoid(z)
        return h  # Return the probabilities (confidence scores)

    def Get_Weights(self):
        return self.weights  # Return model weights

class OneVsAllLogisticRegression:
    def __init__(self, num_classes=4, alpha=0.01, max_iters=None, threshold=1e-5):
        self.num_classes = num_classes
        self.models = [LogisticRegression(alpha, max_iters, threshold) for _ in range(num_classes)]

    def Train(self, X, Y, alpha=0.01, max_iters=None, print_loss_iter=100):
        for i in range(self.num_classes):
            print(f"Training classifier for class {i} vs all...")
            Y_binary = np.where(Y == i, 1, 0)
            self.models[i].Train(X, Y_binary, alpha, max_iters, print_loss_iter)

    def Predict_Class(self, X):
        confidence_scores = np.zeros((X.shape[0], self.num_classes))
        for i in range(self.num_classes):
            confidence_scores[:, i] = self.models[i].Predict_Confidence(X)  # Get confidence for each class

        return np.argmax(confidence_scores, axis=1)

    def Get_Weights(self):
        return [model.Get_Weights() for model in self.models]

unique_classes = cars_df['class'].unique()
class_to_int = {label: idx for idx, label in enumerate(unique_classes)}
Y = cars_df['class'].map(class_to_int).to_numpy()  # Convert classes to integers

X = cars_df.drop(columns=['class']).to_numpy()

num_classes = len(unique_classes)

ova_lr = OneVsAllLogisticRegression(num_classes=num_classes, alpha=0.01, max_iters=1000, threshold=1e-5)

ova_lr.Train(X, Y, alpha=0.01, max_iters=1000, print_loss_iter=100)

Y_pred = ova_lr.Predict_Class(X)

accuracy = np.mean(Y_pred == Y)
print(f"Accuracy: {accuracy * 100:.2f}%")

weights = ova_lr.Get_Weights()
print("Model weights for each class:", weights)


Training classifier for class 0 vs all...
Iteration 0: Loss = 0.6931471805599452
Iteration 100: Loss = 0.5493295378566357
Iteration 200: Loss = 0.5117132637219841
Iteration 300: Loss = 0.49818302283253385
Iteration 400: Loss = 0.4916471466666432
Iteration 500: Loss = 0.48753896461019897
Iteration 600: Loss = 0.4844011513372349
Iteration 700: Loss = 0.4817123241918736
Iteration 800: Loss = 0.4792703599318234
Iteration 900: Loss = 0.4769912951610611
Training classifier for class 1 vs all...
Iteration 0: Loss = 0.6931471805599452
Iteration 100: Loss = 0.6787078635689118
Iteration 200: Loss = 0.6718101979891401
Iteration 300: Loss = 0.6667139404970499
Iteration 400: Loss = 0.6622195490231287
Iteration 500: Loss = 0.6580583131583269
Iteration 600: Loss = 0.6541577381988778
Iteration 700: Loss = 0.6504883156575503
Iteration 800: Loss = 0.6470308645135593
Iteration 900: Loss = 0.6437694312852794
Training classifier for class 2 vs all...
Iteration 0: Loss = 0.6931471805599452
Iteration 100: Lo

# Question 1


In [55]:
file=pd.read_csv('/content/advertising.csv')
print(file)

        TV  Radio  Newspaper  Sales
0    230.1   37.8       69.2   22.1
1     44.5   39.3       45.1   10.4
2     17.2   45.9       69.3   12.0
3    151.5   41.3       58.5   16.5
4    180.8   10.8       58.4   17.9
..     ...    ...        ...    ...
195   38.2    3.7       13.8    7.6
196   94.2    4.9        8.1   14.0
197  177.0    9.3        6.4   14.8
198  283.6   42.0       66.2   25.5
199  232.1    8.6        8.7   18.4

[200 rows x 4 columns]


In [56]:
data=np.array(file)
print(data)

[[230.1  37.8  69.2  22.1]
 [ 44.5  39.3  45.1  10.4]
 [ 17.2  45.9  69.3  12. ]
 [151.5  41.3  58.5  16.5]
 [180.8  10.8  58.4  17.9]
 [  8.7  48.9  75.    7.2]
 [ 57.5  32.8  23.5  11.8]
 [120.2  19.6  11.6  13.2]
 [  8.6   2.1   1.    4.8]
 [199.8   2.6  21.2  15.6]
 [ 66.1   5.8  24.2  12.6]
 [214.7  24.    4.   17.4]
 [ 23.8  35.1  65.9   9.2]
 [ 97.5   7.6   7.2  13.7]
 [204.1  32.9  46.   19. ]
 [195.4  47.7  52.9  22.4]
 [ 67.8  36.6 114.   12.5]
 [281.4  39.6  55.8  24.4]
 [ 69.2  20.5  18.3  11.3]
 [147.3  23.9  19.1  14.6]
 [218.4  27.7  53.4  18. ]
 [237.4   5.1  23.5  17.5]
 [ 13.2  15.9  49.6   5.6]
 [228.3  16.9  26.2  20.5]
 [ 62.3  12.6  18.3   9.7]
 [262.9   3.5  19.5  17. ]
 [142.9  29.3  12.6  15. ]
 [240.1  16.7  22.9  20.9]
 [248.8  27.1  22.9  18.9]
 [ 70.6  16.   40.8  10.5]
 [292.9  28.3  43.2  21.4]
 [112.9  17.4  38.6  11.9]
 [ 97.2   1.5  30.   13.2]
 [265.6  20.    0.3  17.4]
 [ 95.7   1.4   7.4  11.9]
 [290.7   4.1   8.5  17.8]
 [266.9  43.8   5.   25.4]
 

In [57]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load the data
file = pd.read_csv('/content/advertising.csv')
print(file)

# Convert to numpy array
data = np.array(file)
print(data)

# Separate features and target variable
X = data[:, :-1]  # All columns except the last
Y = data[:, -1]   # Only the last column
print(X)
print(Y)

# Feature scaling
X = X.astype(float)  # Convert to float for mathematical operations
X_max = X.max(axis=0)
X_max[X_max == 0] = 1  # Avoid division by zero
X = X / X_max
print(X)
print(X.shape)

class LinearRegression:
    def __init__(self, alpha=0.01, max_iters=None, threshold=1e-6):
        self.alpha = alpha
        self.max_iters = max_iters
        self.threshold = threshold
        self.weights = None

    def initializeWeights(self, n_features):
        self.weights = np.random.rand(n_features)

    def loss(self, X, Y):
        predictions = np.dot(X, self.weights)
        errors = predictions - Y
        mse = np.mean(errors ** 2)
        return mse

    def gradientDescent(self, X, Y):
        n_samples = X.shape[0]
        predictions = np.dot(X, self.weights)
        errors = predictions - Y
        gradient = (2 / n_samples) * np.dot(X.T, errors)
        self.weights -= self.alpha * gradient

    def Train(self, X, Y, alpha=0.01, max_iters=None, print_loss_iter=100):
        self.alpha = alpha
        self.max_iters = max_iters
        n_samples, n_features = X.shape
        self.initializeWeights(n_features)

        prev_loss = float('inf')
        epoch = 0

        while epoch < (self.max_iters if self.max_iters else float('inf')):
            epoch += 1
            # Perform one step of gradient descent
            self.gradientDescent(X, Y)

            # Compute current loss
            current_loss = self.loss(X, Y)

            # Print loss if required
            if epoch % print_loss_iter == 0:
                print(f'Epoch {epoch}, Loss: {current_loss}')

            # Check for stopping criteria
            loss_change = abs(prev_loss - current_loss)
            if self.max_iters is None and loss_change < self.threshold:
                print(f'Stopping at epoch {epoch}, Loss: {current_loss}, Change in loss: {loss_change}')
                break

            prev_loss = current_loss

    def Predict(self, X):
        return np.dot(X, self.weights)

    def Get_Weights(self):
        return self.weights


model = LinearRegression(alpha=0.01, max_iters=1000)
model.Train(X, Y)

# Make predictions
predictions = model.Predict(X)
print(predictions)

# Get model weights
weights = model.Get_Weights()
print(weights)


        TV  Radio  Newspaper  Sales
0    230.1   37.8       69.2   22.1
1     44.5   39.3       45.1   10.4
2     17.2   45.9       69.3   12.0
3    151.5   41.3       58.5   16.5
4    180.8   10.8       58.4   17.9
..     ...    ...        ...    ...
195   38.2    3.7       13.8    7.6
196   94.2    4.9        8.1   14.0
197  177.0    9.3        6.4   14.8
198  283.6   42.0       66.2   25.5
199  232.1    8.6        8.7   18.4

[200 rows x 4 columns]
[[230.1  37.8  69.2  22.1]
 [ 44.5  39.3  45.1  10.4]
 [ 17.2  45.9  69.3  12. ]
 [151.5  41.3  58.5  16.5]
 [180.8  10.8  58.4  17.9]
 [  8.7  48.9  75.    7.2]
 [ 57.5  32.8  23.5  11.8]
 [120.2  19.6  11.6  13.2]
 [  8.6   2.1   1.    4.8]
 [199.8   2.6  21.2  15.6]
 [ 66.1   5.8  24.2  12.6]
 [214.7  24.    4.   17.4]
 [ 23.8  35.1  65.9   9.2]
 [ 97.5   7.6   7.2  13.7]
 [204.1  32.9  46.   19. ]
 [195.4  47.7  52.9  22.4]
 [ 67.8  36.6 114.   12.5]
 [281.4  39.6  55.8  24.4]
 [ 69.2  20.5  18.3  11.3]
 [147.3  23.9  19.1  14.6]
 [21