In [1]:
import numpy as np 
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import pickle

In [None]:
corelated_df = pd.read_csv("penguins_corelated_task_2.csv")

In [None]:
class Logit_Regression():
    def __init__(self, learning_rate, iterations_count):
        self.learning_rate = learning_rate
        self.iterations_count = iterations_count
        self.weights = None
        self.loss = []
        self.bias = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    def cost(self, y, y_hat):
        N = len(y)
        return (1/N) * np.sum(-y * np.log(y_hat) - (1 - y) * np.log(1 - y_hat))
    
    def gradient_descent(self,X, y):
        N = len(y)
        z = np.dot(X, self.weights) + self.bias
        y_hat = self.sigmoid(z)
        delta = y_hat - y
        x_trans = np.transpose(X)
        dW = np.dot(x_trans,delta)/N
        db = np.sum(delta)/N
        self.bias = self.bias - self.learning_rate*db
        self.weights = self.weights - self.learning_rate*dW
        return self.weights, y_hat

    def fit(self,X,y):
        self.weights = np.random.uniform(0, 1, size=(X.shape[1], 1))
        self.bias = 0
        for i in range(self.iterations_count):
            self.weights, y_hat = self.gradient_descent(X,y)
            c = self.cost(y, y_hat)
            self.loss.append(c)
            print(f"Iteration {i}:\nLoss is {c}\n")

    def predict(self, X):
        z = np.dot(X, self.weights) + self.bias
        y_hat = self.sigmoid(z)
        return y_hat
    
    def accuracy(self, y, y_hat):
        y_hat_bin = (y_hat > 0.5).astype(int)
        y_bin = (y > 0.5).astype(int)
        predictions = np.sum(y_hat_bin == y_bin)
        acc = predictions / len(y)
        return acc
    
    def loss_graph(self):
        plt.plot(range(self.iterations_count), self.loss)
        plt.xlabel('Iterations')
        plt.ylabel('Loss')
        plt.title('Loss Over Iterations')
        plt.show()
    
    def save_model_to_pickle(model, filename="model_weights.pkl"):
        with open(filename, 'wb') as f:
            pickle.dump({'weights': model.weights, 'bias': model.bias}, f)
        print(f"Model weights saved to {filename}")




In [None]:
X = corelated_df[['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']].values
y = corelated_df['gender_target'].values.reshape(-1, 1) 
N = X.shape[0] 
train_size = int(0.8 * N) 
index_number = np.arange(N)
np.random.shuffle(index_number)
train_indices = index_number[:train_size]
test_indices = index_number[train_size:]
X_train = X[train_indices]
y_train = y[train_indices]
X_test = X[test_indices]
y_test = y[test_indices]
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)


learning_rate_0 = 0.001
iterations_count_0 = 10000
model_0 = Logit_Regression(learning_rate_0, iterations_count_0)
model_0.fit(X_train, y_train)
y_pred_0 = model_0.predict(X_test)
test_accuracy_0 = model_0.accuracy(y_test, y_pred_0)

learning_rate = 0.001
iterations_count = 50000
model = Logit_Regression(learning_rate, iterations_count)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
test_accuracy = model.accuracy(y_test, y_pred)

learning_rate_1 = 0.01
iterations_count_1 = 100000
model_1 = Logit_Regression(learning_rate_1, iterations_count_1)
model_1.fit(X_train, y_train)
y_pred_1 = model_1.predict(X_test)
test_accuracy_1 = model_1.accuracy(y_test, y_pred_1)


print(f"Test Accuracy 0: ",test_accuracy_0 * 100)
model_0.loss_graph()
print(f"Test Accuracy: ",test_accuracy * 100)
model.loss_graph()
print(f"Test Accuracy 1: ",test_accuracy_1 * 100)
model_1.loss_graph()

best_acc = max(test_accuracy, test_accuracy_1, test_accuracy_0)
print(f"Best Accuracy: ",best_acc * 100)
if best_acc == test_accuracy:
    Logit_Regression.save_model_to_pickle(model, filename="model_weights.pkl")
elif best_acc == test_accuracy_1:
    Logit_Regression.save_model_to_pickle(model_1, filename="model_weights.pkl")
else: 
    Logit_Regression.save_model_to_pickle(model_0, filename="model_weights.pkl")