In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from google.colab import files

In [2]:
df = pd.read_csv('/content/car_price_performance.csv')

In [3]:
brand_avg_price = df.groupby('Brand')['Price'].mean().sort_values()
brand_mapping = {brand: i for i, brand in enumerate(brand_avg_price.index, start=1)}
df['Encoded_Brand'] = df['Brand'].map(brand_mapping)
print("\nEncoded Brand values:")
print(df[['Brand', 'Encoded_Brand']].drop_duplicates())
df


Encoded Brand values:
      Brand  Encoded_Brand
0   Brand C              6
1   Brand A              4
3   Brand E              2
4   Brand X              8
5   Brand W              1
6   Brand Y              3
9   Brand D              9
12  Brand B              5
13  Brand Z              7


Unnamed: 0,Brand,Price,Horsepower,Fuel Efficiency,Encoded_Brand
0,Brand C,91942,382,15.3,6
1,Brand A,70075,197,32.6,4
2,Brand C,84524,188,34.9,6
3,Brand E,68939,258,15.1,2
4,Brand X,66505,277,13.0,8
...,...,...,...,...,...
1995,Brand Z,34535,196,15.2,7
1996,Brand C,35215,377,25.3,6
1997,Brand E,72980,294,21.8,2
1998,Brand Z,55921,443,11.0,7


In [4]:
def min_max_scale(series):
    return (series - series.min()) / (series.max() - series.min())


In [5]:
df['Scaled_Horsepower'] = min_max_scale(df['Horsepower'])

In [6]:
print(df.columns)
df['Scaled_Fuel_Efficiency'] = min_max_scale(df['Fuel Efficiency'])

Index(['Brand', 'Price', 'Horsepower', 'Fuel Efficiency', 'Encoded_Brand',
       'Scaled_Horsepower'],
      dtype='object')


In [7]:
df['Scaled_Brand'] = min_max_scale(df['Encoded_Brand'])

In [8]:
df['Performance_Index'] = df['Horsepower'] * df['Fuel Efficiency']
print(df['Performance_Index'])
df

0       5844.6
1       6422.2
2       6561.2
3       3895.8
4       3601.0
         ...  
1995    2979.2
1996    9538.1
1997    6409.2
1998    4873.0
1999    2790.0
Name: Performance_Index, Length: 2000, dtype: float64


Unnamed: 0,Brand,Price,Horsepower,Fuel Efficiency,Encoded_Brand,Scaled_Horsepower,Scaled_Fuel_Efficiency,Scaled_Brand,Performance_Index
0,Brand C,91942,382,15.3,6,0.7050,0.212,0.625,5844.6
1,Brand A,70075,197,32.6,4,0.2425,0.904,0.375,6422.2
2,Brand C,84524,188,34.9,6,0.2200,0.996,0.625,6561.2
3,Brand E,68939,258,15.1,2,0.3950,0.204,0.125,3895.8
4,Brand X,66505,277,13.0,8,0.4425,0.120,0.875,3601.0
...,...,...,...,...,...,...,...,...,...
1995,Brand Z,34535,196,15.2,7,0.2400,0.208,0.750,2979.2
1996,Brand C,35215,377,25.3,6,0.6925,0.612,0.625,9538.1
1997,Brand E,72980,294,21.8,2,0.4850,0.472,0.125,6409.2
1998,Brand Z,55921,443,11.0,7,0.8575,0.040,0.750,4873.0


In [9]:
final_df = df[['Brand', 'Price', 'Horsepower', 'Fuel Efficiency',
               'Encoded_Brand', 'Scaled_Horsepower',
               'Scaled_Fuel_Efficiency', 'Scaled_Brand', 'Performance_Index']]
final_df.head()

Unnamed: 0,Brand,Price,Horsepower,Fuel Efficiency,Encoded_Brand,Scaled_Horsepower,Scaled_Fuel_Efficiency,Scaled_Brand,Performance_Index
0,Brand C,91942,382,15.3,6,0.705,0.212,0.625,5844.6
1,Brand A,70075,197,32.6,4,0.2425,0.904,0.375,6422.2
2,Brand C,84524,188,34.9,6,0.22,0.996,0.625,6561.2
3,Brand E,68939,258,15.1,2,0.395,0.204,0.125,3895.8
4,Brand X,66505,277,13.0,8,0.4425,0.12,0.875,3601.0


In [10]:
final_df.to_csv('scaled_car_data.csv', index=False)
files.download('scaled_car_data.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>