In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels as sm
import seaborn as sns 
import plotly.express as px
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

In [None]:
nike = pd.read_csv(r'../input/nike-fashion-products-dataset/nike_data_2022_09.csv')

In [None]:
nike.nunique()

In [None]:
nike.drop(['url','brand','model','currency','description','raw_description','images','uniq_id'],axis=1,inplace=True)

In [None]:
nike.head()

In [None]:
nike.isnull().sum() 

In [None]:
nike.dtypes

In [None]:
nike.describe()

In [None]:
nike.describe(include='object')

In [None]:
sns.boxplot(nike['avg_rating'])

In [None]:
Q1= nike.avg_rating.quantile(0.25)
Q3= nike.avg_rating.quantile(0.75)
Q1,Q3

In [None]:
IQR=Q3-Q1
IQR

In [None]:
ll= Q1-1.5*IQR
ul= Q3+1.5*IQR
ll,ul

In [None]:
nike[(nike.avg_rating<ll)|(nike.avg_rating>ul)]

In [None]:
sns.boxplot(nike['review_count'])

In [None]:
Q1= nike.review_count.quantile(0.25)
Q3=nike.review_count.quantile(0.75)
Q1,Q3

In [None]:
IQR=Q3-Q1
IQR

In [None]:
ll= Q1-1.5*IQR
ul= Q3+1.5*IQR
ll,ul

In [None]:
nike[(nike.review_count<ll)|(nike.review_count>ul)]

**Handling Null Values**

In [None]:
nike['avg_rating'] = nike['avg_rating'].fillna(nike['avg_rating'].median(),axis=0)
nike['review_count'] = nike['review_count'].fillna(nike['review_count'].median(),axis=0)

In [None]:
# Since both avg rating and review count had outliers we have used median to handle null values

In [None]:
nike.describe()

In [None]:
nike['color'] = nike['color'].fillna(nike['color'].mode()[0])
nike['availability'] = nike['availability'].fillna(nike['availability'].mode()[0])
nike['available_sizes'] = nike['available_sizes'].fillna(nike['available_sizes'].mode()[0])

In [None]:
# Since all the three values were categorical which is obj type we have used mode to handle null values

In [None]:
nike.describe(include='object')

**Correlation**

In [None]:
plt.figure(figsize=(7,7))
sns.heatmap(nike.corr(),annot=True)
plt.show()
nike.corr()

In [None]:
#Through the correlation chart we could easily identify that all the values are negatively correlated.Thus, have no correlation 

Visualization

In [None]:
plt.axvline(nike['price'].mean(), 0,1, color = 'yellow')
sns.distplot(nike['price'], color='green',bins = 5, kde = True)

In [None]:
plt.axvline(nike['avg_rating'].mean(), 0,1, color = 'yellow')
sns.distplot(nike['avg_rating'], color='green',bins = 5, kde = True)

In [None]:
px.scatter(nike,x="avg_rating", y="price", size="review_count", color="review_count",hover_name="name",log_x=True, 
                 size_max=100)

In [None]:
sns.countplot(nike['availability'],palette='husl')

In [None]:
try:
  name= nike['name'].value_counts()[:5]
  plt.figure(figsize=(20,7))
  ax = sns.barplot(x=list(name.keys()),y=list(name.values),palette='Blues')
  for container in ax.containers:
    ax.bar_label(container)

except AttributeError:
  print('AttributeError Spotted')

finally:
  print('Top 5 Nike Names')

In [None]:
try:
  color = nike['color'].value_counts()[:5]
  plt.figure(figsize=(20,7))
  ax = sns.barplot(x=list(color.keys()),y=list(color.values),palette='Greens')
  for container in ax.containers:
    ax.bar_label(container)

except AttributeError:
  print('AttributeError Spotted')

finally:
  print('Top 5 Nike Colors')

In [None]:
try:
  available = nike['available_sizes'].value_counts()[:5]
  plt.figure(figsize=(20,7))
  ax = sns.barplot(x=list(available.keys()),y=list(available.values),palette='Reds')
  for container in ax.containers:
    ax.bar_label(container)

except AttributeError:
  print('AttributeError Spotted')

finally:
  print('Top 5 Available Sizes')

**Analysis**

> Most feasible price was around 40-50 USD

> Average rating of Nike was of 4.5

> Avg rating of 4.8 has recieved numerous review count of 5 within various price range

> Maimum review count at avg rate 4.8 is of 120 which is Air Jordan 1 Zoom CMFT of price 150 USD

> Nike Essentials within the price range of 12 USD has the avg rating of 4.7 and contains heighest review count of 316

> Top 5 Purchase from Nike has been of Nike spotswear being the heighest followed by Nike, Nike dri FIT, Jordan and Club America

> Top 5 Colors in purchase was of Black being the heighest followed by White, Navy, Black/white and Multicolor

> Size which is most available is S|M|L|XL|2XL