In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import seaborn as sb
import plotly.express as px

In [None]:
df = pd.read_csv('car_prices.csv')
print(df.head())

In [None]:
print(df.info())

In [None]:
print(df.describe())

In [None]:
nan_counts = df.isna().sum()
print(nan_counts)

In [None]:
df['model'].value_counts(dropna=False)

In [None]:
df.nunique()

In [None]:
print(df['color'].unique())

In [None]:
df = df.drop_duplicates()

In [None]:
df = df.loc[df['color'] != '—']

In [None]:
df = df.dropna()
print(df.isna().sum())

In [None]:
df.dtypes

In [None]:
df1 = df[['year', 'condition', 'odometer', 'mmr', 'sellingprice']]
scaler = MinMaxScaler()
scaler.fit(df1)
print(scaler.data_max_)
print(scaler.transform(df1))

scaled_data = scaler.fit_transform(df1)

df2 = pd.DataFrame(scaled_data, columns=df1.columns)

print(df2)


# DATA VISUALISATION (lab 2)

In [None]:
df['color'].value_counts().plot.bar()

In [None]:
counts = df['make'].value_counts()
top_counts = counts.head(20)
top_counts.plot.bar()

In [None]:
df_ford = df.loc[df['make'] == 'Ford']
ford_colors = df_ford['color'].value_counts()

df_chev = df.loc[df['make'] == 'Chevrolet']
chev_colors = df_chev['color'].value_counts()

colors_comparison = pd.DataFrame(data={'Ford': ford_colors, 'Chevrolet': chev_colors})
colors_comparison.plot.bar()
# print(colors_comparison)

In [None]:
df_sb = df[['year', 'make', 'sellingprice']]
selected_makes = ['Chevrolet', 'Lexus', 'Mazda']
df_sb = df_sb[df_sb['make'].isin(selected_makes)]
df_avg_price = df_sb.groupby(['make', 'year'])['sellingprice'].mean().reset_index()
df_avg_price

In [None]:
sb.set_style("whitegrid")

plt.figure(figsize=(12, 8))
sb.pointplot(data=df_avg_price, x='year', y='sellingprice', hue='make', dodge=False, markers='o', linestyles='--')
plt.title('Point Plot for Sales Prices by Year and Brand')
plt.xlabel('Year')
plt.ylabel('Selling Price')
plt.xticks(rotation=45)
plt.legend(title='Make', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

## Pie chart

In [None]:
pie_chart = px.pie(values=df['transmission'].value_counts(), 
                   names=df['transmission'].unique(),
                   color_discrete_sequence=px.colors.sequential.RdBu, 
                   title='Transmission type')

pie_chart.update_traces(textposition='inside',
                        textinfo='percent+label+value',
                        marker=dict(line=dict(color='#FFFFFF', width=2)),
                        textfont_size=12)

pie_chart.show()
