# Linear Regression Analysis

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Load the dataset
data = pd.read_csv('/mnt/data/Enriched_Clean_Data.csv', delimiter=';', thousands='.')

# Drop rows with missing values
data_cleaned = data.dropna()

# Remove rows with non-numeric values in the 'Sales' column
data_cleaned = data_cleaned[pd.to_numeric(data_cleaned['Sales'], errors='coerce').notnull()]

# Convert the 'Sales' column to float
data_cleaned['Sales'] = data_cleaned['Sales'].str.replace('.', '', regex=False).astype(float)

# Reassign the cleaned data to X and y
X = data_cleaned[['shipping_costs', 'number_of_products_in_warehouse', 'production_costs_per_unit']]
y = data_cleaned['Sales']

# Split the cleaned data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create and train the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate R-squared, Mean Squared Error, and Mean Absolute Error
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

r2, mse, mae