![image](https://storage.googleapis.com/kaggle-datasets-images/3694126/6406287/a4d224d51d77b4e28b600d8b8c1a4ef1/dataset-cover.png?t=2023-09-03-01-21-44)

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:100%;letter-spacing:0.5px;margin:0"><b> </b>Introduction</p></div>


This dataset compiles information on global fashion brands featured in Interbrand's Top 100 Global Brand list from 2001 to 2021. It includes data on brand name, country of origin, region of origin, industry sector, and sub-sector. For each year within this period, it provides numeric values such as brand ranking, brand equity in USD billion, and the growth rate in brand equity percentage.

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:100%;letter-spacing:0.5px;margin:0"><b> </b> Import Modules</p></div>

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import time

rc = {
    "axes.facecolor": "#FFD9E6",
    "figure.facecolor": "#FFD9E6",
    "axes.edgecolor": "#000000",
    "grid.color": "#EBEBE7",
    "font.family": "serif",
    "axes.labelcolor": "#000000",
    "xtick.color": "#000000",
    "ytick.color": "#000000",
    "grid.alpha": 0.4
}

sns.set(rc=rc)

from colorama import Style, Fore
red = Style.BRIGHT + Fore.RED
blu = Style.BRIGHT + Fore.BLUE
mgt = Style.BRIGHT + Fore.MAGENTA
gld = Style.BRIGHT + Fore.YELLOW
res = Style.RESET_ALL

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:100%;letter-spacing:0.5px;margin:0"><b> </b> Data Loading and Exploration</p></div>


### 🔘 Load the dataset using a suitable library like pandas.

In [None]:
# Read the Excel file
data = pd.read_excel('/kaggle/input/global-fashion-brands/7fg8835b4g-1/Dataset Global Fashion Brands Brand Equity Ranking Growth Rate  COO ROO 2001-2021.xlsx')

### 🔘 Explore the first few rows of the dataset to understand its structure

In [None]:
data.head().style.set_properties(**{'background-color':'blue','color':'white','border-color':'#8338EC'})

### 🔘 Check for null values. If any are found, remove them

In [None]:
data_num_rows, data_num_cols = data.shape
print(f'Dataset \nNumber of rows : {data_num_rows}\nNumber of columns: {data_num_cols}')

### 🔘 Basic info

In [None]:
data.info()

In [None]:
data.columns

### 🔘 Count null/nan values

In [None]:
data.isna().sum()

In [None]:
import missingno as msno

fig, ax = plt.subplots(2,2,figsize=(12,7))
axs = np.ravel(ax)
msno.matrix(data,  fontsize=9, color=(0.25,0,0.5),ax=axs[0]);
msno.bar(data, fontsize=8, color=(0.25,0,0.5), ax=axs[1]);
msno.heatmap(data,fontsize=8,ax=axs[2]);
msno.dendrogram(data,fontsize=8,ax=axs[3], orientation='top')

fig.suptitle('Missing Values Analysis', y=1.01, fontsize=15);

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:100%;letter-spacing:0.5px;margin:0"><b> </b>Data Cleaning</p></div>


### 🔘 Handle missing values appropriately (e.g., replace with zeros or mean values)

In [None]:
# Checking the null values in the data set
data.isna().sum()/len(data)*100

In [None]:
# Replacing NaN with zeros
data_filled = data.fillna(0)

# Checking the null values in the data set after filling NaN with zeros
missing_percent = data_filled.isna().sum() / len(data_filled) * 100

print("Percentage of missing values after replacing NaN with zeros:")
print(missing_percent)

In [None]:
#Checking the Percentage of the null values in the dataset
null_values=data.isna().sum()
total_missing_values=null_values.sum()
percentage_missing_values=(total_missing_values)*100
print(f'The data set contains {percentage_missing_values} of values')


### 🔘 Inspect for duplicate values and remove them if present.

In [None]:
# Checking the duplicate values in the dataset
duplicate=data.duplicated().sum()
print(f'There are {duplicate} values in the data set; we will remove them.')

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:100%;letter-spacing:0.5px;margin:0"><b> </b>Data Preparation</p></div>


### 🔘 Generate statistics about the data using the `describe` function.

In [None]:
data.describe().style.background_gradient(cmap='tab20c')

In [None]:
data.describe(include=['object'])

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:100%;letter-spacing:0.5px;margin:0"><b> </b>Data Analysis and Visualization</p></div>


# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:80%;letter-spacing:0.5px;margin:0"><b> </b>Two multi-classification tasks with the following target classes:</p></div>


# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:90%;letter-spacing:0.5px;margin:0"><b>1. </b> Brand Sector</p></div>

### 🔘  Fashion
### 🔘  Cosmetics



In [None]:
# Create a Brand Sector classification
#sns.set(style="whitegrid")
plt.figure(figsize=(10, 6))
sns.countplot(x="BrandSector", data=data, palette="Set3")
plt.title("Brand Sector Classification", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.xlabel("Brand Sector", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Count", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.show()

# Print the value counts for Brand Sector
brand_sector_counts = data["BrandSector"].value_counts()
print("Brand Sector Value Counts:")
print(brand_sector_counts)
print()

# Create density distribution for Brand Sector based on "Equity2021"
plt.figure(figsize=(10, 6))
sns.kdeplot(data=data, x="Equity2021", hue="BrandSector", palette="Set3", fill=True)
plt.title("Brand Equity Density Distribution by Brand Sector", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.xlabel("Brand Equity (2021)", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Density", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.savefig('Brand Equity Density Distribution by Brand Sector.png')
plt.show()

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:90%;letter-spacing:0.5px;margin:0"><b>2. </b>Brand Subsector</p></div>

 

### 🔘 Luxury
### 🔘 Cosmetics


In [None]:
# Create a Brand Subsector classification
plt.figure(figsize=(10, 6))
sns.countplot(x="BrandSubSector", data=data, palette="Set2")
plt.title("Brand Subsector Classification", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.xlabel("Brand Subsector", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Count", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.show()

# Print the value counts for Brand Subsector
brand_subsector_counts = data["BrandSubSector"].value_counts()
print("Brand Subsector Value Counts:")
print(brand_subsector_counts)
print()

# Create density distribution for Brand Subsector based on "Equity2021"
plt.figure(figsize=(10, 6))
sns.kdeplot(data=data, x="Equity2021", hue="BrandSubSector", palette="Set2", fill=True)
plt.title("Brand Equity Density Distribution by Brand Subsector", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.xlabel("Brand Equity (2021)", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Density", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.savefig('Brand Equity Density Distribution by Brand Subsector.png')
plt.show()

In [None]:
# Create a bar plot to show the distribution of brands by origin country
plt.figure(figsize=(12, 6))
sns.countplot(x="BrandOriginCountry", data=data, palette="Set3", order=data["BrandOriginCountry"].value_counts().index)
plt.title("Distribution of Brands by Origin Country", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.xlabel("Origin Country", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Count", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.xticks(rotation=90)
plt.savefig('Distribution of Brands by Origin Country.png')
plt.show()

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:80%;letter-spacing:0.5px;margin:0"><b> </b>Brand Sector/Subsector Trends Over Time</p></div>



#### 🔘  The distribution of brands in different sectors and subsectors has evolved over the years (from 2001 to 2021).
#### 🔘 To visualize these trends and see if there are any shifts or patterns.

In [None]:
# Define the year you want to analyze (e.g., 2001)
year_to_analyze = "Rank2001"  

# To show sector/subsector trends over time (using Rank2001 year)
plt.figure(figsize=(12, 6))
sns.countplot(x=year_to_analyze, hue="BrandSector", data=data, palette="Set3")
plt.title(f"Brand Sector Trends Over Time ({year_to_analyze[-4:]})", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.xlabel(f"Rank in {year_to_analyze[-4:]}", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Count", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.legend(title="Brand Sector", loc="upper right")
plt.savefig('Brand Sector Trends Over Time.png')

# Print the value counts for each sector
sector_counts = data["BrandSector"].value_counts()
print(f"Brand Sector Value Counts (Year {year_to_analyze[-4:]}):")
print(sector_counts)
plt.show()

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:85%;letter-spacing:0.5px;margin:0"><b> </b>Brand Equity Analysis</p></div>

#### 🔘 Compare the brand equity of brands within the same sector or subsector.
#### 🔘 Calculate summary statistics (mean, median, etc.) of brand equity for each sector or subsector.
#### 🔘 Visualize brand equity distributions using box plots or violin plots.

In [None]:
# Calculate mean brand equity for each sector/subsector
equity_mean = data.groupby(["BrandSector", "BrandSubSector"])["Equity2021"].mean().reset_index()

# Print the mean brand equity for each sector/subsector
print("Mean Brand Equity by Sector/Subsector (2021):")
print(equity_mean)

# To visualize brand equity by sector/subsector
plt.figure(figsize=(12, 6))
sns.barplot(x="BrandSector", y="Equity2021", hue="BrandSubSector", data=equity_mean, palette="Set2")
plt.title("Brand Equity by Sector/Subsector", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.xlabel("Brand Sector", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Mean Brand Equity", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.show()


# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:85%;letter-spacing:0.5px;margin:0"><b> </b>Brand Ranking Analysis</p></div>

#### 🔘 Analyze how the ranking of brands has changed over time within each sector or subsector.
#### 🔘 Identify the top-performing brands within each category.
#### 🔘 To visualize brand ranking trends.

In [None]:
# Define the year you want to analyze (e.g., 2001)
year_to_analyze = "Rank2001" 

# To show brand ranking trends over time (using Rank2001 year)
plt.figure(figsize=(12, 6))
for subsector in data["BrandSubSector"].unique():
    subsector_data = data[data["BrandSubSector"] == subsector]
    sns.lineplot(x=year_to_analyze, y="Rank2021", data=subsector_data, label=subsector)
plt.title(f"Brand Ranking Trends Over Time ({year_to_analyze[-4:]})", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.xlabel(f"Rank in {year_to_analyze[-4:]}", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Rank", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.legend(title="Brand Subsector")
plt.savefig('Brand Ranking Trends Over Time.png')
plt.show()

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:85%;letter-spacing:0.5px;margin:0"><b> </b>Growth Rate Analysis</p></div>

#### 🔘 Examine the growth rates of brands within different sectors or subsectors.
#### 🔘 Identify brands with the highest and lowest growth rates.
#### 🔘 Visualize growth rate distributions.

In [None]:
# To visualize growth rate distributions by sector/subsector
plt.figure(figsize=(12, 6))
sns.boxplot(x="BrandSector", y="GrowthRate2021", hue="BrandSubSector", data=data, palette="Set2")
plt.title("Growth Rate Analysis", fontsize = 12, fontweight = 'bold', color = 'darkgreen')
plt.xlabel("Brand Sector", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Growth Rate", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.savefig('Growth Rate Analysis.png')
plt.show()

# Print summary statistics for growth rate by sector/subsector
summary_stats = data.groupby(["BrandSector", "BrandSubSector"])["GrowthRate2021"].describe()
print("Summary Statistics for Growth Rate by Sector/Subsector (2021):")
print(summary_stats)

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:85%;letter-spacing:0.5px;margin:0"><b> </b>Geographical Analysis</p></div>


#### 🔘 Explore the geographical distribution of brands based on their origin country or region.
#### 🔘 To visualize which countries or regions have the highest number of brands.

In [None]:
# To show the distribution of brands by origin country
plt.figure(figsize=(12, 6))
sns.countplot(x="BrandOriginCountry", data=data, palette="Set3", order=data["BrandOriginCountry"].value_counts().index)
plt.title("Distribution of Brands by Origin Country", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.xlabel("Origin Country", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Count", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.xticks(rotation=90)
plt.savefig('Distribution of Brands by Origin Country.png')
plt.show()

# Print the count of brands by origin country
brand_count_by_country = data["BrandOriginCountry"].value_counts()
print("Count of Brands by Origin Country:")
print(brand_count_by_country)

# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:85%;letter-spacing:0.5px;margin:0"><b> </b>Correlation Analysis</p></div>


#### 🔘 Investigate potential correlations between brand equity, growth rate, and brand ranking within sectors or subsectors.
#### 🔘 Calculate correlation coefficients and visualize correlations using heatmaps.

In [None]:
# Calculate correlation matrix
correlation_matrix = data[["Equity2021", "Rank2021", "GrowthRate2021"]].corr()

# Create a heatmap to visualize correlations
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Correlation Analysis", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.savefig('Correlation Analysis.png')
plt.show()

# Print the correlation matrix
print("Correlation Matrix:")
print(correlation_matrix)


# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:85%;letter-spacing:0.5px;margin:0"><b> </b>Outlier Detection</p></div>


#### 🔘 Identify and investigate outliers in brand equity, growth rate, or ranking data.
#### 🔘 Determine if outliers are genuine or require further data validation.

In [None]:
# Detect and visualize outliers in brand equity using a box plot
plt.figure(figsize=(8, 6))
sns.boxplot(x="BrandSector", y="Equity2021", data=data, palette="Set3")
plt.title("Outlier Detection - Brand Equity", fontsize = 14, fontweight = 'bold', color = 'darkgreen')
plt.xlabel("Brand Sector", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.ylabel("Brand Equity", fontsize = 12, fontweight = 'bold', color = 'darkblue')
plt.savefig('Outlier Detection - Brand Equity.png')
plt.show()

# Detect and print outliers using quartiles
brand_sector_outliers = {}
for sector in data["BrandSector"].unique():
    sector_data = data[data["BrandSector"] == sector]
    q1 = sector_data["Equity2021"].quantile(0.25)
    q3 = sector_data["Equity2021"].quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    outliers = sector_data[(sector_data["Equity2021"] < lower_bound) | (sector_data["Equity2021"] > upper_bound)]
    brand_sector_outliers[sector] = outliers

print("Outliers by Brand Sector:")
for sector, outliers in brand_sector_outliers.items():
    print(f"Brand Sector: {sector}")
    print(outliers[["BrandName", "Equity2021"]])
    print()


# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:80%;letter-spacing:0.5px;margin:0"><b> </b>Time Series Analysis</p></div>


#### 🔘 Time series analysis to detect trends, seasonality, and anomalies.

In [None]:
# Fill missing values in the "Equity2021" column with forward fill
data["Equity2021"].fillna(method='ffill', inplace=True)

# Perform seasonal decomposition
from statsmodels.tsa.seasonal import seasonal_decompose

result = seasonal_decompose(data["Equity2021"], model="additive", period=1)
result.plot()
plt.show()


# <div style="color:darkgreen;display:inline-block;border-radius:5px;background-color:#FFD9E6;font-family:Nexa;overflow:hidden"><p style="padding:15px;color:darkgreen;overflow:hidden;font-size:85%;letter-spacing:0.5px;margin:0"><b> </b>Machine Learning Predictions</p></div>



#### 🔘 Utilize machine learning algorithms for predictive modeling, such as predicting future brand rankings or growth rates based on historical data.

In [None]:
# Use machine learning algorithms (e.g., regression, classification) for predictions

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [None]:
# Check for missing values (NaN) in the entire DataFrame
missing_data = data.isna().sum()

# Print the count of missing values for each column
print("Missing Data Analysis:")
print(missing_data)

In [None]:
# Replacing NaN with zeros
data_filled = data.fillna(0)

data.isna().sum()

In [None]:
# Drop rows with NaN values in any column
data.dropna(inplace=True)

data.isna().sum()

In [None]:
# Use machine learning algorithms (e.g., regression, classification) for predictions

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [None]:
# Check for and handle missing values
data.dropna(subset=["Rank2021", "GrowthRate2021", "Equity2021"], inplace=True)

In [None]:
# Check if there are any data samples left
if data.shape[0] > 0:
    # Split the data into features (X) and target (y)
    X = data[["Rank2021", "GrowthRate2021"]]
    y = data["Equity2021"]

    # Create and fit the Linear Regression model
    model = LinearRegression()
    model.fit(X, y)

    # Predict on the same dataset (not recommended for evaluation)
    y_pred = model.predict(X)

    # Evaluate the model (not recommended without a separate test set)
    mse = mean_squared_error(y, y_pred)
    rmse = np.sqrt(mse)

    # Print the evaluation metrics
    print("Mean Squared Error (MSE):", mse)
    print("Root Mean Squared Error (RMSE):", rmse)
else:
    print("No data samples left after handling missing values.")

##### "No data samples left after handling missing values," suggests that there were no valid data samples remaining in the dataset after handling missing values. This could mean that the dataset had missing values in such a way that all rows were removed during data preprocessing, leaving no data for training and evaluation. It's essential to investigate and handle missing values appropriately to ensure meaningful model training and evaluation.

<div class="alert alert-block alert-info"> 📌 "Hey there! Your positive feedback and support for my notebook mean the world to me! It motivates me to create more valuable content. If you can spare a moment to give it an upvote, it would help others discover and benefit from it too. Together, let's foster a vibrant community of knowledge-sharing and empowerment. Thank you for considering it, and continued success on your learning journey!"😊