### 1. Install Dependencies

In [None]:
!pip install pandas scikit-learn -qq

### 2. Import Dependencies

In [None]:
# Read the csv files
import pandas as pd
# Import dependencies for generating our model
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, make_scorer

In [None]:
df = pd.read_csv('mobile_prices_2023.csv')

In [None]:
#checking for null values
df.isna().sum()

In [None]:
df.dropna()

In [None]:
# List of all features in the CSV
list = ["Phone Name","Rating ?/5" ,"Number of Ratings","RAM","ROM/Storage","Back/Rare Camera","Front Camera" ,"Battery" ,"Processor" ,"Price in INR","Date of Scraping"]
# Encoding
le=LabelEncoder()
for i in list:
    df[i]=le.fit_transform(df[i])

### 3. Feature Selection

In [None]:
X = df[['Number of Ratings', 'RAM', 'ROM/Storage', 'Back/Rare Camera', 'Front Camera', 'Battery', 'Processor']]
y = df['Price in INR']

### 4. Define Our Models

In [None]:
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree Regression': DecisionTreeRegressor(),
    'Random Forest Regression': RandomForestRegressor()
}

### 5. Train and Evaluate Models

In [None]:
best_model = None
best_accuracy = 0.0
scorer = make_scorer(mean_absolute_error, greater_is_better=False)

In [None]:
for model_name, model in models.items():
    cv_scores = cross_val_score(model, X, y, cv=5, scoring=scorer)
    mean_accuracy = -cv_scores.mean()  # Cross_val_score returns negative MAE for scoring='neg_mean_absolute_error'
    print(f'{model_name} - Mean Absolute Error: {mean_accuracy}')

    if mean_accuracy > best_accuracy:
        best_model = model
        best_accuracy = mean_accuracy

Linear Regression - Mean Absolute Error: 129.2812414961295
Decision Tree Regression - Mean Absolute Error: 126.0067163885457
Random Forest Regression - Mean Absolute Error: 102.92226558610673


### 6. Train the best model on the entire dataset (optional)

In [None]:
best_model.fit(X, y)

### 7. Prediction

In [None]:
new_mobile_features = [[33561, 2, 32, 8, 5, 5000, 2.0]]  # Replace this with new mobile features
predicted_price = best_model.predict(new_mobile_features)
print('Best Model:', best_model.__class__.__name__)
print('Predicted Price:', predicted_price)

### 8. Save the Model

In [None]:
import joblib

In [None]:
joblib.dump(best_model, 'best_model.pkl')