In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from google.colab import files

# Read training data from uploaded file
uploaded = files.upload()  # This will prompt you to upload the 'Training data.xlsx' file

data = pd.read_excel('Training data.xlsx')
x_train = np.array(data.iloc[:, 0:8])
y_train = np.array(data.iloc[:, 8]).reshape(-1, 1)

# Function to check if a column is numeric
def is_numeric(col):
    try:
        col.astype(np.float64)
        return True
    except ValueError:
        return False

# Feature changing without sklearn
def feature_changing(x_train):
    for col in range(x_train.shape[1]):
        if not is_numeric(x_train[:, col]):
            unique_categories = np.unique(x_train[:, col])
            category_map = {category: idx for idx, category in enumerate(unique_categories)}
            for i in range(len(x_train)):
                x_train[i, col] = category_map[x_train[i, col]]
    return x_train

x_train = feature_changing(x_train)

# Z-score normalization
def z_score(x_train):
    x_mean = np.mean(x_train, axis=0)
    x_std = np.std(x_train, axis=0)
    x_train = (x_train - x_mean) / x_std
    return x_train, x_std, x_mean

x_train = x_train.astype(np.float64)
x_train, x_std, x_mean = z_score(x_train)

# Cost function using Mean Square Error
def cost(x_train, y_train, w, b):
    m = len(y_train)
    y_pred = np.dot(x_train, w) + b
    loss = (1/(2*m)) * np.sum((y_pred - y_train)**2)
    return loss

# Gradient Descent
def gradient_descent(x_train, y_train, w, b, learning_rate=0.01, num_iterations=1000):
    m = len(y_train)
    for i in range(num_iterations):
        y_pred = np.dot(x_train, w) + b
        dw = (1/m) * np.dot(x_train.T, (y_pred - y_train))
        db = (1/m) * np.sum(y_pred - y_train)
        w -= learning_rate * dw
        b -= learning_rate * db
    return w, b

# Initialize weights and bias
np.random.seed(2147483647)
w = np.random.randn(x_train.shape[1], 1)
b = np.random.randn(1)

# Gradient descent
old_cost = float('inf')
while True:
    current_cost = cost(x_train, y_train, w, b)
    if abs(old_cost - current_cost) < 0.00001:
        break
    old_cost = current_cost
    w, b = gradient_descent(x_train, y_train, w, b)

# Read test data from uploaded file
uploaded = files.upload()  # This will prompt you to upload the 'Test data.xlsx' file
x_predict = pd.read_excel('Test data.xlsx').iloc[:, :8].to_numpy()
x_predict = feature_changing(x_predict)
x_predict = (x_predict - x_mean) / x_std
ans = pd.read_excel('Test data.xlsx').iloc[:, 8].to_numpy()

y_predict = np.dot(x_predict, w) + b

# Calculating accuracy
accuracy = 0
for dim in range(len(ans)):
    if abs(y_predict[dim] - ans[dim]) < 0.5:  # tolerance of +- 0.5
        accuracy += 1
accuracy = round(accuracy * 100 / len(ans), 2)
ok = 'Congratulations' if accuracy > 95 else 'Optimization required'
print(f"{ok}, your accuracy is {accuracy}%")


Saving Training data.xlsx to Training data (5).xlsx


Saving Test data.xlsx to Test data (1).xlsx
Congratulations, your accuracy is 100.0%
