In [19]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, mean_absolute_error
from sklearn.linear_model import LogisticRegression
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew



dataset = pd.read_csv("/content/dengue_cases_created.csv")


In [20]:
X = dataset.iloc[:, :-1].values
Y = dataset.iloc[:, -1].values


In [21]:
ct = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(sparse_output=False), [0, 5, 6, 7, 8])],
    remainder='passthrough'
)

In [22]:
X = ct.fit_transform(X)
X = X.astype(float)

In [23]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=1)
print(X_train)
print(Y_train)
print(X_test)
print(Y_test)


[[0. 1. 0. ... 1. 0. 1.]
 [1. 0. 0. ... 1. 1. 1.]
 [1. 0. 0. ... 1. 0. 1.]
 ...
 [0. 1. 0. ... 1. 1. 0.]
 [0. 1. 0. ... 0. 1. 0.]
 [1. 0. 0. ... 1. 1. 0.]]
[1 1 0 ... 1 1 1]
[[0. 1. 0. ... 1. 1. 1.]
 [0. 1. 0. ... 0. 0. 1.]
 [0. 1. 0. ... 1. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 1. 0.]
 [0. 1. 0. ... 1. 1. 0.]]
[1 1 0 ... 1 1 1]


In [24]:
class LinearRegressionScratch:
    def __init__(self):
        self.coef_ = None

    def fit(self, X, Y):
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add intercept term
        self.coef_ = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(Y)

    def predict(self, X):
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add intercept term
        return X_b.dot(self.coef_)

# Train and evaluate Linear Regression
lr_model = LinearRegressionScratch()
lr_model.fit(X_train, Y_train)
lr_predictions = lr_model.predict(X_test)

In [25]:
class LogisticRegressionScratch:
    def __init__(self, learning_rate=0.01, num_iterations=1000):
        self.learning_rate = learning_rate
        self.num_iterations = num_iterations
        self.coef_ = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, Y):
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add intercept term
        m = X_b.shape[0]
        self.coef_ = np.zeros(X_b.shape[1])

        for _ in range(self.num_iterations):
            predictions = self.sigmoid(X_b.dot(self.coef_))
            gradient = (1 / m) * X_b.T.dot(predictions - Y)
            self.coef_ -= self.learning_rate * gradient

    def predict(self, X):
        X_b = np.c_[np.ones((X.shape[0], 1)), X]  # Add intercept term
        probabilities = self.sigmoid(X_b.dot(self.coef_))
        return (probabilities >= 0.5).astype(int)

# Train and evaluate Logistic Regression
log_reg_model = LogisticRegressionScratch()
log_reg_model.fit(X_train, Y_train)
log_reg_predictions = log_reg_model.predict(X_test)

In [26]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, Y_train)
rf_predictions = rf_model.predict(X_test)  # Store the predictions

In [27]:

# Evaluation Metrics for Linear Regression
lr_mse = mean_squared_error(Y_test, lr_predictions)
lr_r2 = r2_score(Y_test, lr_predictions)
lr_mae = mean_absolute_error(Y_test, lr_predictions)

# Evaluation Metrics for Logistic Regression
log_reg_accuracy = accuracy_score(Y_test, log_reg_predictions)

# Evaluation Metrics for Random Forest
rf_mse = mean_squared_error(Y_test, rf_predictions)
rf_r2 = r2_score(Y_test, rf_predictions)
rf_mae = mean_absolute_error(Y_test, rf_predictions)

# Print results for comparison
print("Linear Regression:")
print(f"MSE: {lr_mse}")
print(f"R²: {lr_r2}")
print(f"MAE: {lr_mae}\n")

print("Logistic Regression:")
print(f"Accuracy: {log_reg_accuracy}\n")

print("Random Forest Regression:")
print(f"MSE: {rf_mse}")
print(f"R²: {rf_r2}")
print(f"MAE: {rf_mae}\n")


Linear Regression:
MSE: 2609.091800610097
R²: -10440.128255936446
MAE: 42.56594153159556

Logistic Regression:
Accuracy: 0.4893230349840981

Random Forest Regression:
MSE: 0.25601707323388617
R²: -0.02453547120090338
MAE: 0.45881696090521623

