## Assignment 1

In [3]:
from ucimlrepo import fetch_ucirepo 
  
# fetch dataset 
mushroom = fetch_ucirepo(id=73) 
  
# data (as pandas dataframes) 
X = mushroom.data.features 
y = mushroom.data.targets 
  
# metadata 
print(mushroom.metadata) 
  
# variable information 
print(mushroom.variables) 


{'uci_id': 73, 'name': 'Mushroom', 'repository_url': 'https://archive.ics.uci.edu/dataset/73/mushroom', 'data_url': 'https://archive.ics.uci.edu/static/public/73/data.csv', 'abstract': 'From Audobon Society Field Guide; mushrooms described in terms of physical characteristics; classification: poisonous or edible', 'area': 'Biology', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 8124, 'num_features': 22, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['poisonous'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1981, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5959T', 'creators': [], 'intro_paper': None, 'additional_info': {'summary': "This data set includes descriptions of hypothetical samples corresponding to 23 species of gilled mushrooms in the Agaricus and Lepiota Family (pp. 500-525).  Each species is identified as definitely edible, definitely po

In [4]:
X

Unnamed: 0,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,stalk-shape,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,x,s,n,t,p,f,c,n,k,e,...,s,w,w,p,w,o,p,k,s,u
1,x,s,y,t,a,f,c,b,k,e,...,s,w,w,p,w,o,p,n,n,g
2,b,s,w,t,l,f,c,b,n,e,...,s,w,w,p,w,o,p,n,n,m
3,x,y,w,t,p,f,c,n,n,e,...,s,w,w,p,w,o,p,k,s,u
4,x,s,g,f,n,f,w,b,k,t,...,s,w,w,p,w,o,e,n,a,g
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,k,s,n,f,n,a,c,b,y,e,...,s,o,o,p,o,o,p,b,c,l
8120,x,s,n,f,n,a,c,b,y,e,...,s,o,o,p,n,o,p,b,v,l
8121,f,s,n,f,n,a,c,b,n,e,...,s,o,o,p,o,o,p,b,c,l
8122,k,y,n,f,y,f,c,n,b,t,...,k,w,w,p,w,o,e,w,v,l


In [5]:
y

Unnamed: 0,poisonous
0,p
1,e
2,e
3,p
4,e
...,...
8119,e
8120,e
8121,e
8122,p


#### Custom Logistic Regression Implementation and Evaluation with a Dummy Dataset

In [19]:
import numpy as np

class LogisticRegressionModel:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Gradient Descent
        for _ in range(self.epochs):
            model = np.dot(X, self.weights) + self.bias
            y_predicted = self._sigmoid(model)

            error = y_predicted - y
            dw = (1 / n_samples) * np.dot(X.T, error * y_predicted * (1 - y_predicted))
            db = (1 / n_samples) * np.sum(error * y_predicted * (1 - y_predicted))

            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return y_predicted_cls

# Usage example with a dummy dataset
np.random.seed(0)
X_dummy = np.random.rand(100, 5)  # 100 samples, 5 features each
y_dummy = np.random.randint(0, 2, 100)  # Binary targets

# Create the Logistic Regression model
log_reg_model = LogisticRegressionModel(learning_rate=0.01, epochs=1000)

# Fit the model
log_reg_model.fit(X_dummy, y_dummy)

# After fitting the model, print the updated weights and bias
print("Updated weights:", log_reg_model.weights)
print("Updated bias:", log_reg_model.bias)

# Predict using the model
y_pred_dummy = log_reg_model.predict(X_dummy)

# Evaluate the dummy predictions
accuracy_dummy = np.mean(y_pred_dummy == y_dummy)
print(f'Dummy Accuracy: {accuracy_dummy:.2f}')


Updated weights: [-0.09168462  0.00289753 -0.04169774  0.01148246  0.00544851]
Updated bias: -0.057496310387814706
Dummy Accuracy: 0.54


#### Applying the Logistic Regression Model to the Mushroom Dataset

In [20]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score
from ucimlrepo import fetch_ucirepo

# Fetch the dataset
mushroom = fetch_ucirepo(id=73)

# Data as pandas dataframes
X = mushroom.data.features
y = mushroom.data.targets

# Initialize LabelEncoder
encoder = LabelEncoder()

# Encode target labels
y_encoded = y.apply(encoder.fit_transform)

# Encode categorical features
X_encoded = pd.get_dummies(X, drop_first=True)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

# Standardize features by removing the mean and scaling to unit variance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

class LogisticRegressionModel:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Gradient Descent
        for _ in range(self.epochs):
            model = np.dot(X, self.weights) + self.bias
            y_predicted = self._sigmoid(model)

            error = y_predicted - y
            dw = (1 / n_samples) * np.dot(X.T, error * y_predicted * (1 - y_predicted))
            db = (1 / n_samples) * np.sum(error * y_predicted * (1 - y_predicted))

            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return y_predicted_cls

# Initialize the logistic regression model
log_reg_model = LogisticRegressionModel(learning_rate=0.01, epochs=1000)

# Train the model on the training data
log_reg_model.fit(X_train_scaled, y_train.values.ravel())

# Predict on the testing data
y_pred = log_reg_model.predict(X_test_scaled)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')


Accuracy: 0.99


####  Complete Logistic Regression Workflow with the Mushroom Dataset

In [21]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score

class LogisticRegressionModel:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def _sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        # Gradient Descent
        for _ in range(self.epochs):
            model = np.dot(X, self.weights) + self.bias
            y_predicted = self._sigmoid(model)

            error = y_predicted - y
            dw = (1 / n_samples) * np.dot(X.T, error * y_predicted * (1 - y_predicted))
            db = (1 / n_samples) * np.sum(error * y_predicted * (1 - y_predicted))

            # Update parameters
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return y_predicted_cls


X = mushroom.data.features
y = mushroom.data.targets

# Initialize LabelEncoder
encoder = LabelEncoder()

# Encode target labels with value between 0 and n_classes-1.
y_encoded = encoder.fit_transform(y['poisonous'])

# Encode categorical features using one-hot encoding
X_encoded = pd.get_dummies(X, drop_first=True)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y_encoded, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize and train your logistic regression model
log_reg_model = LogisticRegressionModel(learning_rate=0.01, epochs=1000)
log_reg_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = log_reg_model.predict(X_test_scaled)

# Inverse transform the encoded predictions to their original label (p or e)
y_pred_labels = encoder.inverse_transform(y_pred)

# Print classification results
for i, label in enumerate(y_pred_labels):
    classification = 'poisonous' if label == 'p' else 'edible'
    print(f"Sample {i}: {classification}")


Sample 0: edible
Sample 1: poisonous
Sample 2: poisonous
Sample 3: edible
Sample 4: poisonous
Sample 5: poisonous
Sample 6: poisonous
Sample 7: poisonous
Sample 8: edible
Sample 9: edible
Sample 10: edible
Sample 11: edible
Sample 12: edible
Sample 13: edible
Sample 14: edible
Sample 15: edible
Sample 16: edible
Sample 17: poisonous
Sample 18: edible
Sample 19: edible
Sample 20: edible
Sample 21: edible
Sample 22: poisonous
Sample 23: edible
Sample 24: poisonous
Sample 25: edible
Sample 26: edible
Sample 27: edible
Sample 28: edible
Sample 29: poisonous
Sample 30: poisonous
Sample 31: poisonous
Sample 32: edible
Sample 33: edible
Sample 34: edible
Sample 35: poisonous
Sample 36: edible
Sample 37: poisonous
Sample 38: poisonous
Sample 39: poisonous
Sample 40: poisonous
Sample 41: edible
Sample 42: poisonous
Sample 43: edible
Sample 44: poisonous
Sample 45: edible
Sample 46: poisonous
Sample 47: edible
Sample 48: poisonous
Sample 49: edible
Sample 50: poisonous
Sample 51: poisonous
Sampl