# Data Preparation

In [None]:
import pandas as pd

# Read the dataset from a CSV file
file_path = r'C:\Users\Documents\Dummy Data HSS.csv'
df = pd.read_csv(file_path)

# Display the original DataFrame
print("Original DataFrame:")
print(df)
print("\n")

# Identify and handle missing values
missing_values = df.isnull().sum()
print("Missing values in the dataset:")
print(missing_values)
print("\n")

# Remove rows with missing values
df.dropna(inplace=True)  # Remove rows with missing values
print("DataFrame after handling missing values:")
print(df)
print("\n")

# Removing duplicates
df.drop_duplicates(inplace=True)  # Remove duplicate rows
print("DataFrame after removing duplicates:")
print(df)
print("\n")

# change the "Influencer" column to numeric value based on the influence level
#4 for Mega, 3 for Macro, 2 for Nano, 1 for Micro
df['Influencer'].replace({'Mega': 4, 'Macro': 3, 'Nano': 2, 'Micro': 1}, inplace = True)
print(df)
print("\n")

# Save the cleaned DataFrame to a new CSV file
cleaned_file_path = 'Data_Clean.csv'
df.to_csv(cleaned_file_path, index=False)
print(f"Cleaned data saved to: {cleaned_file_path}")

# Linear Regression Algorithm & Evaluation

In [None]:
import numpy as np
import pandas as pd

# Load your dataset
file_path = r'C:\Users\Documents\Data_Clean.csv'
df = pd.read_csv(file_path)

# Separate features (X) and target variable (y)
X = df.drop('Sales', axis=1)
y = df['Sales'].values  

# Split the data into training and testing sets (70:30 ratio)
split_index = int(0.7 * len(df))
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y[:split_index], y[split_index:]

# Ensure that the features have valid names
feature_names = X.columns

# Create a linear regression model
class LinearRegressionCustom:
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None

    def fit(self, X, y):
        X_ = np.column_stack((np.ones(len(X)), X))
        beta = np.linalg.inv(X_.T @ X_) @ X_.T @ y
        self.intercept_ = beta[0]
        self.coef_ = beta[1:]

    def predict(self, X):
        return self.intercept_ + X @ self.coef_

# Custom functions for evaluation metrics
def calculate_mse(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def calculate_mae(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))

def calculate_r2(y_true, y_pred):
    mean_y = np.mean(y_true)
    ss_total = np.sum((y_true - mean_y) ** 2)
    ss_residual = np.sum((y_true - y_pred) ** 2)
    r2 = 1 - (ss_residual / ss_total)
    return r2

def calculate_rmse(y_true, y_pred):
    mse = calculate_mse(y_true, y_pred)
    return np.sqrt(mse)

# Train the model on the training set
model = LinearRegressionCustom()
model.fit(X_train.values, y_train)

# Save the trained model to a pickle file
model_filename_pkl = 'linear_regression_model.pkl'
joblib.dump(model, model_filename_pkl)

print(f'Model saved as {model_filename_pkl}')

# Make predictions on the training set
y_train_pred = model.predict(X_train.values)

# Evaluate the model on the training set
mse_train = calculate_mse(y_train, y_train_pred)
mae_train = calculate_mae(y_train, y_train_pred)
r2_train = calculate_r2(y_train, y_train_pred)
rmse_train = calculate_rmse(y_train, y_train_pred)

# Make predictions on the testing set
y_test_pred = model.predict(X_test.values)

# Evaluate the model on the testing set
mse_test = calculate_mse(y_test, y_test_pred)
mae_test = calculate_mae(y_test, y_test_pred)
r2_test = calculate_r2(y_test, y_test_pred)
rmse_test = calculate_rmse(y_test, y_test_pred)

# Display results for training set
print("Training Metrics:")
print(f'Mean Squared Error (MSE): {mse_train:.4f}')
print(f'Mean Absolute Error (MAE): {mae_train:.4f}')
print(f'R-squared Score (R2): {r2_train:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse_train:.4f}')

# Display results for testing set
print("\nTesting Metrics:")
print(f'Mean Squared Error (MSE): {mse_test:.4f}')
print(f'Mean Absolute Error (MAE): {mae_test:.4f}')
print(f'R-squared Score (R2): {r2_test:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse_test:.4f}')


# Evaluation Using Library Sklearn

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import joblib

# Load your dataset
file_path = r'C:\Users\Documents\Data_Clean.csv'
df = pd.read_csv(file_path)

# Separate features (X) and target variable (y)
X = df.drop('Sales', axis=1)
y = df['Sales']

# Split the data into training and testing sets (70:30 ratio)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.7, random_state=42)

# Ensure that the features have valid names
feature_names = X.columns

# Create a linear regression model
model = LinearRegression()

# Train the model on the training set
model.fit(X_train, y_train)

# Save the trained model to a pickle file
model_filename_pkl = 'linear_regression_model.pkl'
joblib.dump(model, model_filename_pkl)

print(f'Model saved as {model_filename_pkl}')

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mse)

# Display results including accuracy and evaluation metrics
print(f'Mean Squared Error (MSE): {mse:.4f}')
print(f'Mean Absolute Error (MAE): {mae:.4f}')
print(f'R-squared Score (R2): {r2:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse:.4f}')


# Tkinter GUI

In [None]:
import tkinter as tk
from tkinter import ttk
import joblib
import numpy as np

# Load the trained linear regression model
model_filename_pkl = 'linear_regression_model.pkl'
model = joblib.load(model_filename_pkl)

def predict_sales(tv, radio, social_media, influencer_type):
    # Convert input values to numeric types
    tv = float(tv)
    radio = float(radio)
    social_media = float(social_media)
    
    # Map the influencer type to numeric value
    influencer_mapping = {'Macro': 3, 'Mega': 4, 'Micro': 1, 'Nano': 2}
    influencer_value = influencer_mapping.get(influencer_type, 0)
    
    # Create the input array
    input_data = np.array([[tv, radio, social_media, influencer_value]])
    
    # Clear previous result
    result_label.config(text='')

    # Make predictions
    prediction = model.predict(input_data)

    # Display the new prediction
    result_label.config(text=f'Predicted Sales: {prediction[0]:.2f}')

# Tkinter GUI
root = tk.Tk()
root.title('Sales Prediction')

# Labels
ttk.Label(root, text='TV:').grid(row=0, column=0, padx=5, pady=5)
ttk.Label(root, text='Radio:').grid(row=1, column=0, padx=5, pady=5)
ttk.Label(root, text='Social Media:').grid(row=2, column=0, padx=5, pady=5)
ttk.Label(root, text='Influencer Type:').grid(row=3, column=0, padx=5, pady=5)

# Entry widgets for user input
tv_entry = ttk.Entry(root)
tv_entry.grid(row=0, column=1, padx=5, pady=5)
radio_entry = ttk.Entry(root)
radio_entry.grid(row=1, column=1, padx=5, pady=5)
social_media_entry = ttk.Entry(root)
social_media_entry.grid(row=2, column=1, padx=5, pady=5)

# Dropdown menu for Influencer Type
influencer_type_var = tk.StringVar()
influencer_type_var.set('Macro')  # Default value
influencer_type_dropdown = ttk.Combobox(root, textvariable=influencer_type_var, values=['Macro', 'Mega', 'Micro', 'Nano'])
influencer_type_dropdown.grid(row=3, column=1, padx=5, pady=5)

# Button to trigger prediction
predict_button = ttk.Button(root, text='Predict', command=lambda: predict_sales(
    float(tv_entry.get()), float(radio_entry.get()), float(social_media_entry.get()), influencer_type_var.get()
))
predict_button.grid(row=4, column=0, columnspan=2, pady=10)

# Label to display the result
result_label = ttk.Label(root, text='Predicted Sales: -')
result_label.grid(row=5, column=0, columnspan=2, pady=10)

root.mainloop()
