In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import io

In [2]:
data = pd.read_csv('Mobile-Price-Prediction-cleaned_data.csv')
data.shape

(807, 8)

In [3]:
data.head()

Unnamed: 0,Ratings,RAM,ROM,Mobile_Size,Primary_Cam,Selfi_Cam,Battery_Power,Price
0,4.3,4.0,128.0,6.0,48,13.0,4000,24999
1,3.4,6.0,64.0,4.5,48,12.0,4000,15999
2,4.3,4.0,4.0,4.5,64,16.0,4000,15000
3,4.4,6.0,64.0,6.4,48,15.0,3800,18999
4,4.5,6.0,128.0,6.18,35,15.0,3800,18999


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 807 entries, 0 to 806
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Ratings        807 non-null    float64
 1   RAM            807 non-null    float64
 2   ROM            807 non-null    float64
 3   Mobile_Size    807 non-null    float64
 4   Primary_Cam    807 non-null    int64  
 5   Selfi_Cam      807 non-null    float64
 6   Battery_Power  807 non-null    int64  
 7   Price          807 non-null    int64  
dtypes: float64(5), int64(3)
memory usage: 50.6 KB


In [5]:
data.columns.tolist()

['Ratings',
 'RAM',
 'ROM',
 'Mobile_Size',
 'Primary_Cam',
 'Selfi_Cam',
 'Battery_Power',
 'Price']

In [6]:
rows, cols = data.shape
print(f"Rows: {rows}")
print(f"Columns: {cols}")

Rows: 807
Columns: 8


In [7]:
data.describe()

Unnamed: 0,Ratings,RAM,ROM,Mobile_Size,Primary_Cam,Selfi_Cam,Battery_Power,Price
count,807.0,807.0,807.0,807.0,807.0,807.0,807.0,807.0
mean,4.112639,5.94176,64.390335,5.62066,47.821561,8.868649,3278.859975,14269.167286
std,0.36783,2.056359,53.868626,3.957409,11.155916,4.547254,937.628801,23092.739983
min,2.8,0.0,2.0,2.0,5.0,0.0,1020.0,479.0
25%,3.9,6.0,32.0,4.5,48.0,8.0,3000.0,984.0
50%,4.1,6.0,32.0,4.77,48.0,8.0,3000.0,1699.0
75%,4.4,6.0,64.0,6.3,48.0,12.0,3800.0,18994.5
max,4.8,12.0,256.0,44.0,64.0,23.0,6000.0,153000.0


In [8]:
null_values = data.isnull().sum()
print(null_values)
print("\nTotal:", null_values.sum())

Ratings          0
RAM              0
ROM              0
Mobile_Size      0
Primary_Cam      0
Selfi_Cam        0
Battery_Power    0
Price            0
dtype: int64

Total: 0


In [9]:
data.dtypes

Ratings          float64
RAM              float64
ROM              float64
Mobile_Size      float64
Primary_Cam        int64
Selfi_Cam        float64
Battery_Power      int64
Price              int64
dtype: object

In [10]:
# Robustly handle missing/renamed columns (use alternatives or show available columns)
col_candidates = ['make', 'brand', 'manufacturer', 'maker']
col_map = {c.lower().strip(): c for c in data.columns}
found = None
for cand in col_candidates:
    if cand in col_map:
        found = col_map[cand]
        break
if found is None:
    print("Column 'make' not found. Available columns:")
    print(data.columns.tolist())
else:
    print(data[found].unique())
    print("\nUnique makes:", data[found].nunique())
    print("\n" + "="*50)
    print("\nFuel types:")
    if 'fuel_type' in col_map:
        print(data[col_map['fuel_type']].unique())
    elif 'fuel' in col_map:
        print(data[col_map['fuel']].unique())
    else:
        print("No 'fuel_type' column found.")
    print("\n" + "="*50)
    print("\nTransmission types:")
    if 'transmission' in col_map:
        print(data[col_map['transmission']].unique())
    else:
        print("No 'transmission' column found.")

Column 'make' not found. Available columns:
['Ratings', 'RAM', 'ROM', 'Mobile_Size', 'Primary_Cam', 'Selfi_Cam', 'Battery_Power', 'Price']


In [11]:
data.sample(10)

Unnamed: 0,Ratings,RAM,ROM,Mobile_Size,Primary_Cam,Selfi_Cam,Battery_Power,Price
195,4.1,6.0,32.0,4.5,48,15.0,3000,1099
737,4.1,8.0,64.0,4.54,64,8.0,2500,1199
51,3.8,6.0,32.0,4.54,48,8.0,2800,1299
515,4.5,6.0,32.0,4.5,64,15.0,3000,649
392,3.9,6.0,32.0,4.5,64,12.0,2000,749
151,3.8,6.0,32.0,44.0,48,8.0,3000,890
343,3.6,6.0,64.0,4.5,48,8.0,3500,629
764,4.5,6.0,128.0,6.39,48,13.0,4030,24999
528,3.5,6.0,32.0,4.5,64,8.0,1050,799
387,4.1,1.0,16.0,5.0,5,2.0,2500,5049


In [12]:
data.tail()

Unnamed: 0,Ratings,RAM,ROM,Mobile_Size,Primary_Cam,Selfi_Cam,Battery_Power,Price
802,3.8,6.0,32.0,4.54,48,12.0,2800,1299
803,4.1,8.0,64.0,4.54,64,8.0,2500,1390
804,4.4,3.0,32.0,6.2,48,1.0,3800,9790
805,3.7,10.0,32.0,4.5,64,8.0,3500,799
806,3.5,6.0,32.0,4.5,64,15.0,1050,799


In [13]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 807 entries, 0 to 806
Data columns (total 8 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Ratings        807 non-null    float64
 1   RAM            807 non-null    float64
 2   ROM            807 non-null    float64
 3   Mobile_Size    807 non-null    float64
 4   Primary_Cam    807 non-null    int64  
 5   Selfi_Cam      807 non-null    float64
 6   Battery_Power  807 non-null    int64  
 7   Price          807 non-null    int64  
dtypes: float64(5), int64(3)
memory usage: 50.6 KB


In [14]:
data.columns.tolist()

['Ratings',
 'RAM',
 'ROM',
 'Mobile_Size',
 'Primary_Cam',
 'Selfi_Cam',
 'Battery_Power',
 'Price']

In [15]:
rows, cols = data.shape
print(f"Rows: {rows}")
print(f"Columns: {cols}")

Rows: 807
Columns: 8


In [16]:
data.describe()

Unnamed: 0,Ratings,RAM,ROM,Mobile_Size,Primary_Cam,Selfi_Cam,Battery_Power,Price
count,807.0,807.0,807.0,807.0,807.0,807.0,807.0,807.0
mean,4.112639,5.94176,64.390335,5.62066,47.821561,8.868649,3278.859975,14269.167286
std,0.36783,2.056359,53.868626,3.957409,11.155916,4.547254,937.628801,23092.739983
min,2.8,0.0,2.0,2.0,5.0,0.0,1020.0,479.0
25%,3.9,6.0,32.0,4.5,48.0,8.0,3000.0,984.0
50%,4.1,6.0,32.0,4.77,48.0,8.0,3000.0,1699.0
75%,4.4,6.0,64.0,6.3,48.0,12.0,3800.0,18994.5
max,4.8,12.0,256.0,44.0,64.0,23.0,6000.0,153000.0


In [17]:
null_values = data.isnull().sum()
print(null_values)
print("\nTotal:", null_values.sum())

Ratings          0
RAM              0
ROM              0
Mobile_Size      0
Primary_Cam      0
Selfi_Cam        0
Battery_Power    0
Price            0
dtype: int64

Total: 0


In [18]:
data.dtypes

Ratings          float64
RAM              float64
ROM              float64
Mobile_Size      float64
Primary_Cam        int64
Selfi_Cam        float64
Battery_Power      int64
Price              int64
dtype: object

In [19]:
data.sample(10)

Unnamed: 0,Ratings,RAM,ROM,Mobile_Size,Primary_Cam,Selfi_Cam,Battery_Power,Price
621,4.2,6.0,32.0,4.54,48,15.0,2500,1259
114,3.7,4.0,40.0,4.58,48,8.0,3800,1799
788,4.5,6.0,128.0,6.39,48,13.0,3800,24999
400,3.8,6.0,32.0,4.77,48,15.0,3000,959
660,4.4,4.0,128.0,6.53,48,2.0,3500,16490
627,4.1,6.0,32.0,4.54,48,20.0,3500,3399
79,3.9,6.0,64.0,4.54,48,8.0,3600,1799
17,4.5,8.0,128.0,6.7,64,1.0,4700,77999
685,4.4,8.0,128.0,6.7,48,8.0,3000,40900
521,3.4,4.0,64.0,5.5,48,5.0,3000,13999


In [20]:
print("Checking for missing values...")
missing_data = data.isnull().sum()
print("Missing values per column:")
print(missing_data)
print("\nTotal missing values:", missing_data.sum())

Checking for missing values...
Missing values per column:
Ratings          0
RAM              0
ROM              0
Mobile_Size      0
Primary_Cam      0
Selfi_Cam        0
Battery_Power    0
Price            0
dtype: int64

Total missing values: 0


In [21]:
print(f"Shape before: {data.shape}")
data = data.dropna()
print(f"Shape after: {data.shape}")

Shape before: (807, 8)
Shape after: (807, 8)


In [22]:
float_cols = data.select_dtypes(include=['float64']).columns
print("Columns with float datatype:")
print(float_cols.tolist())

Columns with float datatype:
['Ratings', 'RAM', 'ROM', 'Mobile_Size', 'Selfi_Cam']


In [23]:
for col in float_cols:
    data[col] = data[col].astype(int)

print("Conversion completed")
print(data.dtypes)

Conversion completed
Ratings          int64
RAM              int64
ROM              int64
Mobile_Size      int64
Primary_Cam      int64
Selfi_Cam        int64
Battery_Power    int64
Price            int64
dtype: object


In [24]:
print("Final data shape:", data.shape)
print("\nData types after preprocessing:")
print(data.dtypes)
print("\nNull values after preprocessing:")
print(data.isnull().sum().sum())

Final data shape: (807, 8)

Data types after preprocessing:
Ratings          int64
RAM              int64
ROM              int64
Mobile_Size      int64
Primary_Cam      int64
Selfi_Cam        int64
Battery_Power    int64
Price            int64
dtype: object

Null values after preprocessing:
0
