In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# Load the dataset
file_path = '/content/Cleaned_file10.csv'  # Replace with the correct path to your file
data = pd.read_csv(file_path)

# Clean and preprocess the dataset
# Replace 'Contact seller' in all columns and handle non-numeric data
data.replace('Contact seller', float('nan'), inplace=True)
data.replace("Seller's other items", float('nan'), inplace=True)

# Convert all columns to numeric where possible
for col in data.columns:
    data[col] = pd.to_numeric(data[col], errors='coerce')

# Drop rows with missing values in critical columns
critical_columns = ['Base Price', 'Market Price', 'Stock', 'Rating', 'Pieces sold']
data.dropna(subset=critical_columns, inplace=True)

# Define features and target
features = ['Base Price', 'Stock', 'No_of_Reviewers', 'Rating', 'Pieces sold',
            'Gender', 'Feature_Frequency', 'Material_Frequency', 'Color_Frequency']
target = 'Market Price'

X = data[features]
y = data[target]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features (optional but improves performance in some cases)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train a Linear Regression model
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")

# Display the model coefficients and intercept
print("Model Intercept (Bias):", model.intercept_)
print("Feature Coefficients:", dict(zip(features, model.coef_)))

# Price optimization: Adjust prices to be 40-60% above the base price
optimized_prices = data['Base Price'] * 1.4  # Start with 40% above base price
data['Optimized Price'] = optimized_prices.clip(
    lower=data['Base Price'] * 1.4,
    upper=data['Base Price'] * 1.6  # Ensure within 40-60% range
)

# Compare optimized prices with predicted prices
data['Predicted Price'] = model.predict(scaler.transform(data[features]))

# Display sample results
print(data[['Base Price', 'Market Price', 'Optimized Price', 'Predicted Price']].head())


Mean Squared Error: 5.465639661450036e-24
Model Intercept (Bias): 2940.558345642541
Feature Coefficients: {'Base Price': 3306.031535192916, 'Stock': -3.607928489081566e-13, 'No_of_Reviewers': 1.602276938590013e-12, 'Rating': -1.0260570165468926e-12, 'Pieces sold': -7.386158338267468e-14, 'Gender': 8.238362097658696e-13, 'Feature_Frequency': 6.905196327119609e-13, 'Material_Frequency': -8.202981054597167e-13, 'Color_Frequency': -2.7227316505836383e-13}
   Base Price  Market Price  Optimized Price  Predicted Price
0    595.1088       1525.92        833.15232          1525.92
1   1901.6712       4876.08       2662.33968          4876.08
2   1029.0852       2638.68       1440.71928          2638.68
3   5147.4852      13198.68       7206.47928         13198.68
4    647.6184       1660.56        906.66576          1660.56
