<a href="https://www.kaggle.com/code/abhijitbhandari/kalimati-data-analysis?scriptVersionId=144404507" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv("/kaggle/input/kalimati-tarkari-dataset/Kalimati_Tarkari_Dataset.csv")
data.head()

In [None]:
data.isnull().sum().plot(kind = "bar")

In [None]:
data.describe()

## Data Wrangling

In [None]:
data.dtypes

In [None]:
data["Date"] = pd.to_datetime(data["Date"])
data.head()

In [None]:
data["Month"] = data["Date"].dt.strftime("%B")
data["Year"] = data["Date"].dt.year
data["Day"] = data["Date"].dt.strftime("%A")

In [None]:
data.head()

In [None]:
data.isnull().sum()

In [None]:
print("Total Number of Unique Commodities Sold at Kalimati: ",len(data["Commodity"].unique()))

In [None]:
data["Commodity"].nunique()

In [None]:
data["Commodity"] = data["Commodity"].str.lower()

In [None]:
data.head()

### Data  Available For Different Years

In [None]:
data_magnitude = data.groupby(["Year"])["Year"].count()
plt.figure(figsize = (6,5))
ax = data_magnitude.plot(kind = "bar")



for i,v in enumerate(data_magnitude):
    ax.text(i, v+0.2, str(v), ha = "center", va = "bottom")
    
plt.xlabel("Year", color = "Green", fontsize = 12)
plt.ylabel("Count", color = "Blue", fontsize = 12)
plt.title("Data Distribution For Various Years", color = "Red")
plt.tight_layout()
    
plt.savefig("./1.Data_Distribution.png")
plt.show()
    

**We can assume that a reliable data recording mechanism was established after 2014. The highest number of entries was recorded in the year 2021. However, for the year 2022, it appears that we do not have complete data, and further verification is required.**

In [None]:
plt.figure(figsize = (5,3))
data[data["Year"]==2022]["Month"].value_counts().plot(kind = "bar")
plt.xlabel("Month")
plt.ylabel("Data Count")
plt.title("Data Available For 2022", color = "Red")
plt.tight_layout()

plt.savefig("./2.Data_Distribution_For_2022.png")
plt.show()
    

**Observing the data, it is evident that we have records available only up to the month of April for the year 2022.**

### Changes in Commodities Sold Based on Year

In [None]:
plt.figure(figsize = (7,4))
data.groupby("Year")["Commodity"].nunique().plot(kind = "bar", cmap = "twilight_shifted")
plt.title("Unique Commodities Sold Over Years", color = "Red")
plt.xlabel("Year", color = "green")
plt.ylabel("Unique Commodities", color = "green")
plt.xticks(rotation = 45)
plt.tight_layout()

plt.savefig("./3.unique_commodities.png")

**It's clear that there was a noticeable uptick in the variety of commodities sold after the year 2019. This suggests that Kalimati (or the relevant marketplace) began offering a more extensive range of fruits and vegetables. Furthermore, this expansion indicates that Nepal either began producing or importing new types of fruits and vegetables post-2019.**

#### What are the commodities that started getting included after 2019??

In [None]:
data_before_2019 = data[data["Year"]<2019]
data_before_2019.head()

In [None]:
data_after_2019 = data[data["Year"]>=2019]
data_after_2019.head()

In [None]:
commodities_before_2019 = set(data_before_2019["Commodity"].str.lower())
commodities_after_2019 = set(data_after_2019["Commodity"].str.lower())

In [None]:
unique_commodities_after_2019 = commodities_after_2019 - commodities_before_2019
unique_commodities_before_2019 = commodities_before_2019 - commodities_after_2019

In [None]:
unique_commodities_before_2019

In [None]:
plt.figure(figsize = (8,8))
fig, ax = plt.subplots(figsize = (5,5))
ax.pie([1]*len(unique_commodities_after_2019),
       labels = unique_commodities_after_2019,
       startangle = 200,
       labeldistance= 1.1,
       rotatelabels=True)
ax.axis("equal")
ax.set_title("Unique Items Sold Starting 2019", y = 1.45, color = "Red")
plt.savefig("./4.unique_commodities_after2019.png", bbox_inches = "tight")
plt.show()


In [None]:
plt.figure(figsize = (8,8))
fig, ax = plt.subplots(figsize = (5,5))
ax.pie([1]*len(unique_commodities_before_2019),
       labels = unique_commodities_before_2019,
       startangle = 200,
       labeldistance= 1.1,
       rotatelabels=True)
ax.axis("equal")
ax.set_title("Unique Items Sold Before 2019", y = 1., color = "Red")
plt.savefig("./5.unique_commodities_before2019.png", bbox_inches = "tight")
plt.show()


## Items With Highest Price Fluctuation - Yearwise

In [None]:
items_avg_years = data.groupby(["Year", "Commodity"])["Average"].mean().unstack(level = 0).fillna(value = 0)

#### Price Fluctuation For Commodities With Minimum of 5 Years of Data With Atleast One Missing

In [None]:
def count_zeros(row):
    return (row==0).sum()
zero_count = items_avg_years.apply(count_zeros, axis = 1)

In [None]:
filtered_items_avg_zero_lesseq5 = items_avg_years[(zero_count<=5)&(zero_count>0)]

In [None]:
filtered_items_avg_zero_lesseq5["Coefficient of Variation"] = (filtered_items_avg_zero_lesseq5.std(axis = 1)/filtered_items_avg_zero_lesseq5.mean(axis = 1))*100

In [None]:
sorted_items = filtered_items_avg_zero_lesseq5.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

In [None]:
plt.figure(figsize = (10,5))
for commodity in top_10_variations.index:
    plt.plot(top_10_variations.columns[:-1],
             top_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities with Highest Price Fluctuation (Min. 5 Years of Sales Atleast One Missing)", fontsize = 12)
plt.legend()
plt.grid(True)
plt.savefig("./6.highest_price_fluc_5yrs1miss.png", bbox_inches = "tight")
plt.show()





Here, we are visualizing price fluctuations for commodities with a minimum of 5 years of available data. Notably, mandarin exhibited the highest volatility among these commodities. This could be attributed to its relatively recent introduction to the market or unforeseen disruptions such as the COVID-19 pandemic, impacting the supply chain and price stability.

Mandarins, part of the Citrus genus, are believed to have originated in ancient China, from which they derive their name. [Source: Healthline](https://www.healthline.com/nutrition/mandarin-orange#what-they-are)

The next commodity, strawberry, lacks data from 2017 to 2019. The price surge in 2020 may indicate a substantial increase in demand paired with limited supply. Further commodities are detailed in the legend of the line plot above.

Notably, Clive Dry (Probably Clove Dry) stands out with consistent annual price growth after 2018 and kept on increasing until 2020 and came down by a little bit in 2021 and spiked back again as per the limited data we have for 2022.

#### Price Fluctuation For Commodities With More Than 5 Years Data Inavailability (Limited Data)

In [None]:
filtered_items_avg_zero_gr5 = items_avg_years[zero_count>5]
filtered_items_avg_zero_gr5.head()

In [None]:
filtered_items_avg_zero_gr5["Coefficient of Variation"] = (filtered_items_avg_zero_gr5.std(axis = 1)/filtered_items_avg_zero_gr5.mean(axis = 1))*100
filtered_items_avg_zero_gr5.head()

In [None]:
sorted_items = filtered_items_avg_zero_gr5.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

In [None]:
plt.figure(figsize = (10,5))
for commodity in top_10_variations.index:
    plt.plot(top_10_variations.columns[:-1],
             top_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities with Highest Price Fluctuation (Limited Data)", fontsize = 12)
plt.legend()
plt.grid(True)
plt.savefig("./7.highest_price_fluc_5yrsmore.png", bbox_inches = "tight")
plt.show()




In this analysis, we've examined commodities with more than 5 years of missing or null data. Among these commodities, Sarifa (Custard Apple) displayed the most significant price fluctuations. Given the absence of data before 2020, it suggests that Sarifa is relatively new to the market, gradually gaining recognition, and likely to experience upward price trends. It's important to note that our data only extends until April 2022.

Mango (Calcutte) witnessed a noticeable price surge in 2022 compared to 2021. This increase may be attributed to unforeseen factors or seasonal variations, warranting further investigation through a detailed seasonal analysis.

Avocado is another relatively recent addition to the Kalimati Market, introduced in 2021. Its price experienced a substantial rise in 2022 compared to the previous year, reflecting the evolving dynamics of this emerging product.

Overall, the current trend suggests that prices for commodities in this category are expected to continue rising over the next couple of years, indicating that these products are relatively new entrants in the market.

#### Price Fluctuation of Commodities That Remained Always Available

In [None]:
filtered_items_avg_zero_no = items_avg_years[zero_count==0]
filtered_items_avg_zero_no.head()

In [None]:
filtered_items_avg_zero_no["Coefficient of Variation"] = (filtered_items_avg_zero_no.std(axis = 1)/filtered_items_avg_zero_no.mean(axis = 1))*100

In [None]:
sorted_items = filtered_items_avg_zero_no.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

In [None]:
plt.figure(figsize = (10,5))
for commodity in top_10_variations.index:
    plt.plot(top_10_variations.columns[:-1],
             top_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities With Highest Price Fluctuation Consistently Available in the Market", fontsize = 12)
plt.legend()
plt.grid(True)
plt.savefig("./8.highest_price_fluc_constavail.png", bbox_inches = "tight")
plt.show()






Within this category, we've examined commodities consistently available in the Kalimati Fruits and Vegetable Market. Notably, mint has displayed an annual price increase, followed by mombin and then parsley.

From the line plot, it's evident that asparagus has consistently maintained a high price, with a trend of sustained or increasing demand. Although the price dipped in 2022, this might be attributed to a lack of available data for that year.

Lime appeared to be gaining popularity until 2017, after which its appeal declined. This shift may be linked to increased production or other factors at play.

Celery demonstrated stable performance until 2020, but thereafter, a decline is observed, continuing into 2021 and 2022.

In summary, mint, asparagus, and parsley stand out as exemplary commodities in this category, each demonstrating unique price and demand trends.

## Items With Lowest Price Fluctuation - Yearwise

#### Price Fluctuation of Commodities With Minimum of 5 Years of Data But Atleast With One Missing Data

In [None]:
items_avg_years = data.groupby(["Year", "Commodity"])["Average"].mean().unstack(level = 0).fillna(value = 0)

def count_zeros(row):
    return (row==0).sum()
zero_count = items_avg_years.apply(count_zeros, axis = 1)

filtered_items_avg_zero_lesseq5 = items_avg_years[(zero_count<=5)&(zero_count>0)]

filtered_items_avg_zero_lesseq5["Coefficient of Variation"] = (filtered_items_avg_zero_lesseq5.std(axis = 1)/filtered_items_avg_zero_lesseq5.mean(axis = 1))*100

sorted_items = filtered_items_avg_zero_lesseq5.sort_values(by = ["Coefficient of Variation"],
                               ascending = True)
less_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,5))
for commodity in less_10_variations.index:
    plt.plot(less_10_variations.columns[:-1],
             less_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities with Lowest Price Fluctuation (Min. 5 Years of Sales Atleast One Missing)", fontsize = 12)
plt.legend()
plt.grid(True)
plt.savefig("./9.lowest_price_fluc_5yrs1miss.png", bbox_inches = "tight")
plt.show()




In the analysis presented here, it is evident that for commodities exhibiting a minimum of one missing data point for a specific year but no more than five missing data points within a nine-year timeframe, the prices of Mango (Maldah), Sponge Gourd, and Lemon have exhibited a notable stability. Consequently, we can infer that these commodities are likely to be commonly produced and widely used within Nepalese households.


#### Price Fluctuation For Commodities With More Than 5 Years Data Inavailability (Limited Data)

In [None]:
filtered_items_avg_zero_gr5 = items_avg_years[zero_count>5]

filtered_items_avg_zero_gr5["Coefficient of Variation"] = (filtered_items_avg_zero_gr5.std(axis = 1)/filtered_items_avg_zero_gr5.mean(axis = 1))*100

sorted_items = filtered_items_avg_zero_gr5.sort_values(by = ["Coefficient of Variation"],
                               ascending = True)
less_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,5))
for commodity in less_10_variations.index:
    plt.plot(less_10_variations.columns[:-1],
             less_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities with Lowest Price Fluctuation (Limited Data)", fontsize = 12)
plt.legend()
plt.grid(True)
plt.savefig("./10.lowest_price_fluc_5yrs_moremiss.png", bbox_inches = "tight")
plt.show()



Since the data is missing for more than 5 years, we can assume that these products are relatively new arrivals in the Kalimati Market. In this category, we observe that most of the items are fish. Once introduced and priced in the market, their prices have remained largely unchanged, indicating price stability. This suggests that the usual supply chain has not encountered significant disruptions. Additionally, it is reasonable to assume that people may be less inclined to purchase these products regularly, hence there is no need to increase prices and potentially restrict market demand.

#### Price Fluctuation of Commodities That Remained Always Available

In [None]:
filtered_items_avg_zero_no = items_avg_years[zero_count==0]

filtered_items_avg_zero_no["Coefficient of Variation"] = (filtered_items_avg_zero_no.std(axis = 1)/filtered_items_avg_zero_no.mean(axis = 1))*100

sorted_items = filtered_items_avg_zero_no.sort_values(by = ["Coefficient of Variation"],
                               ascending = True)
less_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,5))
for commodity in less_10_variations.index:
    plt.plot(less_10_variations.columns[:-1],
             less_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities With Lowest Price Fluctuation Consistently Available in the Market", fontsize = 12)
plt.legend()
plt.grid(True)
plt.savefig("./11.lowest_price_fluc_constvail.png", bbox_inches = "tight")
plt.show()



This category encompasses commodities that have consistently been available in the Kalimati Market and enjoy widespread recognition among consumers. Prices within this category have exhibited relatively stable trends. However, for select premium items such as Gundruk, which experienced minimal price fluctuations until 2018, there was a sudden and significant price increase thereafter. On the other hand, Mushroom (Kanya) has consistently commanded a premium price compared to other food items and has maintained a near-constant price level over the years.

## Seasonal Analysis

In our calculations, we have opted not to utilize the mean as the primary measure but have instead chosen the median. This choice stems from our analysis of seasonal prices spanning a 9-year period. Our rationale for this preference is rooted in the understanding that employing the mean as the foundational metric for calculating prices during a specific season may lead to a disproportionate influence of outliers in certain scenarios.

In [None]:
data.head()

Spring (March-May)
Summer (June-August)
Autumn (September-November)
Winter (December-February)

In [None]:
month_to_season = {
    'January': 'Winter',
    'February': 'Winter',
    'March': 'Spring',
    'April': 'Spring',
    'May': 'Spring',
    'June': 'Summer',
    'July': 'Summer',
    'August': 'Summer',
    'September': 'Autumn',
    'October': 'Autumn',
    'November': 'Autumn',
    'December': 'Winter'
}

data["Season"] = data["Month"].map(month_to_season)
data.head()

In [None]:
items_avg_season = data.groupby(["Season", "Commodity"])["Average"].median().unstack(level = 0).fillna(value = 0)
items_avg_season.head(10)

### Items With Highest And Lowest Price Fluctuation Seasonal

#### Price Fluctuation For Commodities With Minimum of 5 Years of Data With Atleast One Missing

In [None]:
def count_zeros(row):
    return (row==0).sum()
zero_count = items_avg_season.apply(count_zeros, axis = 1)

In [None]:
filtered_items_avg_zero_lesseq5 = items_avg_season[(zero_count<=5)&(zero_count>0)]

In [None]:
filtered_items_avg_zero_lesseq5["Coefficient of Variation"] = (filtered_items_avg_zero_lesseq5.std(axis = 1)/filtered_items_avg_zero_lesseq5.mean(axis = 1))*100
sorted_items = filtered_items_avg_zero_lesseq5.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,5))
for commodity in top_10_variations.index:
    plt.plot(top_10_variations.columns[:-1],
             top_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities with Highest Price Fluctuation (Min. 5 Years of Sales Atleast One Missing)", fontsize = 12)
plt.legend()
plt.grid(True)

Here, it is evident that the price of Sarifa exhibits the most pronounced fluctuations, followed by maize. Furthermore, we can infer from the plot that Sarifa is predominantly available in the market during the autumn season, while maize is primarily accessible during the summer and winter seasons. The rise in maize prices during the winter season suggests that this is not a peak production period for maize.

When comparing the prices of various commodities, we observe that Sarifa commands the highest price during the autumn season, Mango (Dushari) peaks in spring, Mango (Chousa) is most expensive in summer, and during the winter season, the price of Bauhania flower reaches its zenith. This analysis is grounded in the concept of identifying commodities with the greatest price fluctuations.

This graphical representation also offers insights into the production seasons of different items. Commodity prices tend to rise when production is low and fall when productivity is high. Applying this principle, we can discern the seasons during which specific products are cultivated.

In [None]:
sorted_items = filtered_items_avg_zero_lesseq5.sort_values(by = ["Coefficient of Variation"],
                               ascending = True)
less_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,5))
for commodity in less_10_variations.index:
    plt.plot(less_10_variations.columns[:-1],
             less_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities with Lowest Price Fluctuation (Min. 5 Years of Sales Atleast One Missing)", fontsize = 12)
plt.legend()
plt.grid(True)

Here, we observe that the prices of Amla, Yam, and Orange (Nepali) remain consistently stable throughout the seasons, unaffected by any fluctuations. However, during the autumn season, we witness the highest price for strawberries, followed by mandarins, and then avocados.

In spring, the pricing landscape shifts, with avocados commanding the highest prices, followed by strawberries and mandarins.

Interestingly, during the summer season, most items in this category are conspicuously absent from the market. The reason behind this scarcity can be attributed to the fact that, upon closer examination, we find that out of the 10 commodities listed, 4 are citrus fruits, and citrus fruits are primarily harvested in winter. This suggests that one contributing factor to their unavailability in the summer is the seasonal nature of citrus fruit production. However, this also underscores a potential issue within the country, as it appears that Nepal may lack a robust system for cold storage and managed supply chains, particularly during the off-seasons, which could help ensure a more consistent availability of these products.

#### Price Fluctuation For Commodities With More Than 5 Years Data Inavailability (Limited Data)

In [None]:
filtered_items_avg_zero_gr5 = items_avg_season[zero_count>5]

filtered_items_avg_zero_gr5["Coefficient of Variation"] = (filtered_items_avg_zero_gr5.std(axis = 1)/filtered_items_avg_zero_gr5.mean(axis = 1))*100

sorted_items = filtered_items_avg_zero_gr5.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,5))
for commodity in top_10_variations.index:
    plt.plot(top_10_variations.columns[:-1],
             top_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities with Highest Price Fluctuation (Limited Data)", fontsize = 12)
plt.legend()
plt.grid(True)


#### Price Fluctuation of Commodities That Remained Always Available

In [None]:
filtered_items_avg_zero_no = items_avg_season[zero_count==0]

filtered_items_avg_zero_no["Coefficient of Variation"] = (filtered_items_avg_zero_no.std(axis = 1)/filtered_items_avg_zero_no.mean(axis = 1))*100

sorted_items = filtered_items_avg_zero_no.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,5))
for commodity in top_10_variations.index:
    plt.plot(top_10_variations.columns[:-1],
             top_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities With Highest Price Fluctuation Consistently Available in the Market", fontsize = 12)
plt.legend()
plt.grid(True)



In this analysis, we have examined commodities that consistently maintained their presence in the market. Within this category, three items - asparagus, carrot (terai), and mombin - exhibited the most significant price fluctuations.

Asparagus stands out with a substantial price spike during the winter months, while its prices reach their lowest point in the summer, implying that summer is likely the peak season for its production.

Following closely in terms of pricing is kiwi, which follows a linear pattern of price increase during the summer and a decline during other seasons, primarily autumn and winter. This trend suggests that kiwi production is more abundant during the autumn and winter seasons.

It's noteworthy that commodities within this category, which consistently maintained market availability, generally experienced lower price fluctuations compared to other products.

In [None]:

sorted_items = filtered_items_avg_zero_no.sort_values(by = ["Coefficient of Variation"],
                               ascending = True)
less_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,5))
for commodity in less_10_variations.index:
    plt.plot(less_10_variations.columns[:-1],
             less_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Year", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities With Lowest Price Fluctuation Consistently Available in the Market", fontsize = 12)
plt.legend()
plt.grid(True)

Within this category, we have scrutinized commodities that exhibit the lowest price fluctuation and consistently maintain their availability in the market. Remarkably, none of the products in this category have experienced significant price spikes in any season. Therefore, it can be reasonably inferred that these products are consistently accessible to consumers throughout the year.

Taking a closer look at these commodities, tofu emerges as the most price-stable option, showing virtually no price variation over time. Among the other commodities in this category, which predominantly consist of various fish products, the price of fresh fish (Chhadi) experiences slightly more fluctuation compared to the others. The fish variety with the most stable pricing, considering seasonal variations over the past nine years, is Fish Fresh (bachuwa). Additionally, the price of Fuji apples remains relatively stagnant, closely followed by Gundruk.

Local French Beans have not exhibited any significant price changes over time, mirroring a similar trend observed for pineapples.

## Analyzing the Most Expensive and Least Expensive Product Over the Years and Seasonwise

In [None]:
max_price = data.groupby(["Year", "Commodity"])["Average"].max().unstack(level = 1).fillna(value = 0)
max_price.head()

In [None]:
def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        return '{v:d} ({p:.2f}'.format(p = pct,v=val)+"%)"
    return my_autopct

In [None]:
colors = ['gold', 'lightcoral', 'lightskyblue', 'lightgreen', 'plum']
for year in max_price.index:
    top5 = max_price.loc[year].nlargest(5)
    plt.figure(figsize = (6,5))
    plt.pie(x = top5.values, labels = top5.index, autopct = make_autopct(top5.values),colors = colors)
    plt.gca().add_artist(plt.Circle((0,0), radius = 0.70, fc = "white"))
    plt.title(f'Top 5 Most Expensive Commodities in {year}')
    plt.axis("equal")
    plt.tight_layout()
    plt.show()
    
    

In [None]:
colors = ['gold', 'lightcoral', 'lightskyblue', 'lightgreen', 'plum']
for year in max_price.index:
    top5 = max_price.loc[year][max_price.loc[year]>0].nsmallest(5)
    
    plt.figure(figsize = (6,5))
    plt.pie(x = top5.values, labels = top5.index, autopct = make_autopct(top5.values),colors = colors)
    plt.gca().add_artist(plt.Circle((0,0), radius = 0.70, fc = "white"))
    plt.title(f'Top 5 Least Expensive Commodities in {year}')
    plt.axis("equal")
    plt.tight_layout()
    plt.show()
    

### Seasonwise Most Expensive

In [None]:
max_price = data.groupby(["Season", "Commodity"])["Average"].max().unstack(level = 1).fillna(value = 0)
max_price.head()

In [None]:
colors = ['gold', 'lightcoral', 'lightskyblue', 'lightgreen', 'plum']
for season in max_price.index:
    top5 = max_price.loc[season].nlargest(5)
    plt.figure(figsize = (6,5))
    plt.pie(x = top5.values, labels = top5.index, autopct = make_autopct(top5.values),colors = colors)
    plt.gca().add_artist(plt.Circle((0,0), radius = 0.70, fc = "white"))
    plt.title(f'Top 5 Most Expensive Commodities in {season}')
    plt.axis("equal")
    plt.tight_layout()
    plt.show()

In [None]:
colors = ['gold', 'lightcoral', 'lightskyblue', 'lightgreen', 'plum']
for season in max_price.index:
    top5 = max_price.loc[season][max_price.loc[season]>0].nsmallest(5)
    plt.figure(figsize = (6,5))
    plt.pie(x = top5.values, labels = top5.index, autopct = make_autopct(top5.values),colors = colors)
    plt.gca().add_artist(plt.Circle((0,0), radius = 0.70, fc = "white"))
    plt.title(f'Top 5 Least Expensive Commodities in {season}')
    plt.axis("equal")
    plt.tight_layout()
    plt.show()