<a href="https://www.kaggle.com/code/abhijitbhandari/kalimati-data-analysis?scriptVersionId=144943225" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from dateutil import parser
import re

In [None]:
data = pd.read_csv("/kaggle/input/kalimati-tarkari-dataset/Kalimati_Tarkari_Dataset.csv")
data.head()

In [None]:
pattern = r"^Rs\s+"
data["Minimum"] = pd.to_numeric(data["Minimum"].apply(lambda x: x if not str(x).startswith("Rs") else re.sub(pattern,"",str(x))))
data["Maximum"] = pd.to_numeric(data["Maximum"].apply(lambda x: x if not str(x).startswith("Rs") else re.sub(r"^Rs\s+", "",str(x))))
data["Average"] = pd.to_numeric(data["Average"].apply(lambda x: x if not str(x).startswith("Rs") else re.sub(r"^Rs\s+", "", str(x))))

In [None]:
data.isnull().sum().plot(kind = "bar")

In [None]:
data.describe()

## Data Wrangling

In [None]:
data.dtypes

In [None]:
def parse_date(date_str):
    try:
        parsed_date = parser.parse(date_str)
        standardized_date = parsed_date.strftime("%Y-%m-%d")
        return standardized_date
    except Exception as e:
        return None
data["Date"] = data["Date"].apply(parse_date)
data["Date"] = pd.to_datetime(data["Date"])

In [None]:
data["Month"] = data["Date"].dt.strftime("%B")
data["Year"] = data["Date"].dt.year
data["Day"] = data["Date"].dt.strftime("%A")

In [None]:
data.head()

In [None]:
data.isnull().sum()

In [None]:
data["Commodity"] = data["Commodity"].str.lower()

In [None]:
print("Total Number of Unique Commodities Sold at Kalimati: ",len(data["Commodity"].unique()))

In [None]:
data["Commodity"].nunique()

In [None]:
data.head()

### Data  Available For Different Years

In [None]:
data_magnitude = data.groupby(["Year"])["Year"].count()
plt.figure(figsize = (6,5))
ax = data_magnitude.plot(kind = "bar")



for i,v in enumerate(data_magnitude):
    ax.text(i, v+0.2, str(v), ha = "center", va = "bottom")
    
plt.xlabel("Year", color = "Green", fontsize = 12)
plt.ylabel("Count", color = "Blue", fontsize = 12)
plt.title("Data Distribution For Various Years", color = "Red")
plt.tight_layout()
    
plt.savefig("./1.Data_Distribution.png")
plt.show()
    

**We can assume that a reliable data recording mechanism was established after 2014. The highest number of entries was recorded in the year 2022. However, 2023 is still in progress and may exceed the data count for 2022.**

In [None]:
plt.figure(figsize = (5,3))
data[data["Year"]==2023]["Month"].value_counts().plot(kind = "bar")
plt.xlabel("Month")
plt.ylabel("Data Count")
plt.title("Data Available For 2023", color = "Red")
plt.tight_layout()

plt.savefig("./2.Data_Distribution_For_2023.png")
plt.show()
    

### Changes in Commodities Sold Based on Year

In [None]:
plt.figure(figsize = (7,4))
data.groupby("Year")["Commodity"].nunique().plot(kind = "bar", cmap = "twilight_shifted")
plt.title("Unique Commodities Sold Over Years", color = "Red")
plt.xlabel("Year", color = "green")
plt.ylabel("Unique Commodities", color = "green")
plt.xticks(rotation = 45)
plt.tight_layout()

plt.savefig("./3.unique_commodities.png")

**It's clear that there was a noticeable uptick in the variety of commodities sold after the year 2019. This suggests that Kalimati (or the relevant marketplace) began offering a more extensive range of fruits and vegetables. Furthermore, this expansion indicates that Nepal either began producing or importing new types of fruits and vegetables post-2019.**

#### What are the commodities that started getting included after 2019??

In [None]:
data_before_2019 = data[data["Year"]<2019]
data_before_2019.head()

In [None]:
data_after_2019 = data[data["Year"]>=2019]
data_after_2019.head()

In [None]:
commodities_before_2019 = set(data_before_2019["Commodity"].str.lower())
commodities_after_2019 = set(data_after_2019["Commodity"].str.lower())

In [None]:
unique_commodities_after_2019 = commodities_after_2019 - commodities_before_2019
unique_commodities_before_2019 = commodities_before_2019 - commodities_after_2019

In [None]:
unique_commodities_before_2019

In [None]:
plt.figure(figsize = (8,8))
fig, ax = plt.subplots(figsize = (5,5))
ax.pie([1]*len(unique_commodities_after_2019),
       labels = unique_commodities_after_2019,
       startangle = 200,
       labeldistance= 1.1,
       rotatelabels=True)
ax.axis("equal")
ax.set_title("Unique Items Sold Starting 2019", y = 1.45, color = "Red")
plt.savefig("./4.unique_commodities_after2019.png", bbox_inches = "tight")
plt.show()


In [None]:
plt.figure(figsize = (8,8))
fig, ax = plt.subplots(figsize = (5,5))
ax.pie([1]*len(unique_commodities_before_2019),
       labels = unique_commodities_before_2019,
       startangle = 200,
       labeldistance= 1.1,
       rotatelabels=True)
ax.axis("equal")
ax.set_title("Unique Items Sold Before 2019", y = 1., color = "Red")
plt.savefig("./5.unique_commodities_before2019.png", bbox_inches = "tight")
plt.show()


#### Any Unique Commodity in 2023?

In [None]:
data_2023 = data[data["Year"]==2023]
data_before_2023 = data[data["Year"]<2023]
commodities_2023 = set(data_2023["Commodity"].str.lower())
commodities_before_2023 = set(data_before_2023["Commodity"].str.lower())

In [None]:
unique_items_2023 = commodities_2023 - commodities_before_2023
unique_items_2023

**Nothing New Has Been Added in 2023**

## Items With Highest Price Fluctuation - Yearwise

In [None]:
items_avg_years = data.groupby(["Year", "Commodity"])["Average"].mean().unstack(level = 0).fillna(value = 0)

#### Price Fluctuation For Commodities With Minimum of 5 Years of Data With Atleast One Missing

In [None]:
def count_zeros(row):
    return (row==0).sum()
zero_count = items_avg_years.apply(count_zeros, axis = 1)

In [None]:
filtered_items_avg_zero_lesseq5 = items_avg_years[(zero_count<=5)&(zero_count<=3)]

In [None]:
filtered_items_avg_zero_lesseq5["Coefficient of Variation"] = (filtered_items_avg_zero_lesseq5.std(axis = 1)/filtered_items_avg_zero_lesseq5.mean(axis = 1))*100

In [None]:
sorted_items = filtered_items_avg_zero_lesseq5.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

In [None]:
top_10_variations.head()

In [None]:
# z_order = top_10_variations.index

In [None]:
# plt.figure(figsize = (10,5))
# count = len(z_order)
# pos = 0
# for commodity in top_10_variations.index:
#     plt.fill_between([int(x) for x in top_10_variations.columns[:-1]],
#              top_10_variations.loc[commodity][:-1], label = commodity, alpha = 0.7, zorder = count - pos)
#     pos+=1
# plt.xlabel("Year", fontsize = 12)
# plt.ylabel("Price", fontsize = 12)
# plt.title("Top 10 Commodities with Highest Price Fluctuation (Min. 5 Years of Sales Atleast One Missing)", fontsize = 12)
# plt.legend(loc = "upper left")
# plt.grid(True)
# plt.savefig("./6.highest_price_fluc_5yrs1miss.png", bbox_inches = "tight")
# plt.show()





In [None]:
plt.figure(figsize = (10,7))

sns.heatmap(top_10_variations.iloc[:,:-1], 
            linewidths = 1,
            cmap='Dark2_r', 
            annot = True,
           fmt = ".1f")
plt.ylabel("Commodity with Highest Price Fluctuation (Descending)")
plt.xlabel("Year")
plt.title("Top 10 Commodities with Highest Price Fluctuation (Min. 5 Years of Sales Not More Than Three Missing)", fontsize = 12)
plt.tight_layout()

Category: **Range (3 <= Zero_Count <= 5)**

1. Prices in this category have been consistently rising since 2019.

2. Among these commodities, Mandarin stands out with the highest price fluctuation. Its price remained relatively stable until 2020 but saw a sharp increase after 2020, with the most significant rise occurring in 2021, around the end of the Covid-19 pandemic. Mandarins have historical roots in ancient China.

3. Sugarcane exhibits a similar level of price volatility, but it disappeared from the Kalimati Market between 2017 and 2019, likely due to the impact of the COVID-19 pandemic during that period.

4. Bauhinia Flower experienced a sudden price surge after 2020, while fresh fish disappeared from the market after that year.

5. Clove Green, Clove Dry, and Mint prices have been consistently decreasing. Mint, in particular, which had shown a steady price increase, experienced a sharp decline in 2023. The price of Mint is the highest in this category.

6. Guava is getting costlier each year.

7. Parseley saw a significant price hike in 2020, followed by a period of relative price stability.

In summary, most items in this category saw a notable increase in prices after 2020.

#### Price Fluctuation For Commodities With More Than 5 Years Data Inavailability (Limited Data)

In [None]:
filtered_items_avg_zero_gr5 = items_avg_years[zero_count>5]
filtered_items_avg_zero_gr5.head()

In [None]:
filtered_items_avg_zero_gr5["Coefficient of Variation"] = (filtered_items_avg_zero_gr5.std(axis = 1)/filtered_items_avg_zero_gr5.mean(axis = 1))*100
filtered_items_avg_zero_gr5.head()

In [None]:
sorted_items = filtered_items_avg_zero_gr5.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

In [None]:
# plt.figure(figsize = (10,5))
# for commodity in top_10_variations.index:
#     plt.plot(top_10_variations.columns[:-1],
#              top_10_variations.loc[commodity][:-1], label = commodity)
# plt.xlabel("Year", fontsize = 12)
# plt.ylabel("Price", fontsize = 12)
# plt.title("Top 10 Commodities with Highest Price Fluctuation (Limited Data)", fontsize = 12)
# plt.legend()
# plt.grid(True)
# plt.savefig("./7.highest_price_fluc_5yrsmore.png", bbox_inches = "tight")
# plt.show()




In [None]:
plt.figure(figsize = (10,7))

sns.heatmap(top_10_variations.iloc[:,:-1], 
            linewidths = 1,
            cmap='Set1_r', 
            annot = True,
           fmt = ".1f")
plt.ylabel("Commodity with Highest Price Fluctuation (Descending)")
plt.xlabel("Year")
plt.title("Top 10 Commodities with Highest Price Fluctuation (Limited Data)", fontsize = 12)
plt.tight_layout()

1. These products fall into the category of premium items due to their recent introduction to the market.
2. Prices for these products have consistently remained high, with the majority of them entering the market in 2020.
3. The timing of the introduction of premium products shortly after the Covid-19 pandemic raises questions about whether they were locally manufactured or imported.
4. Among these premium items, kiwi commands the highest price, while maize experiences significant price fluctuations in the Kalimati market.
5. Avocado is not coming down. It's price is constantly going up.
6. The fluctuation in maize prices can be attributed to its sporadic appearance and disappearance in the market after extended periods of absence.

#### Price Fluctuation of Commodities That Remained Always Available

In [None]:
filtered_items_avg_zero_no = items_avg_years[zero_count==0]
filtered_items_avg_zero_no.head()

In [None]:
filtered_items_avg_zero_no["Coefficient of Variation"] = (filtered_items_avg_zero_no.std(axis = 1)/filtered_items_avg_zero_no.mean(axis = 1))*100

In [None]:
sorted_items = filtered_items_avg_zero_no.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

In [None]:
# plt.figure(figsize = (10,5))
# for commodity in top_10_variations.index:
#     plt.plot(top_10_variations.columns[:-1],
#              top_10_variations.loc[commodity][:-1], label = commodity)
# plt.xlabel("Year", fontsize = 12)
# plt.ylabel("Price", fontsize = 12)
# plt.title("Top 10 Commodities With Highest Price Fluctuation Consistently Available in the Market", fontsize = 12)
# plt.legend()
# plt.grid(True)
# plt.savefig("./8.highest_price_fluc_constavail.png", bbox_inches = "tight")
# plt.show()






In [None]:
plt.figure(figsize = (10,7))

sns.heatmap(top_10_variations.iloc[:,:-1], 
            linewidths = 1,
            cmap='Set1_r', 
            annot = True,
           fmt = ".1f")
plt.ylabel("Commodity with Highest Price Fluctuation (Descending)")
plt.xlabel("Year")
plt.title("Top 10 Commodities With Highest Price Fluctuation Consistently Available in the Market", fontsize = 12)
plt.tight_layout()

1. Prices of consistently available commodities have seen a significant increase.
2. Guava experienced a sharp price spike after 2020.
3. Both mint and parsley became nearly twice as expensive after 2020.
4. Lime prices have been decreasing since 2017, possibly due to local production growth or reduced consumer demand.
5. Ginger prices took a considerable leap in 2023, rising from 55.9 Rs per kilo to 183 Rs per kilo.
6. Despite their consistent availability, asparagus remains a premium product in the market.

## Items With Lowest Price Fluctuation - Yearwise

#### Price Fluctuation of Commodities With Minimum of 5 Years of Data But Atleast With One Missing Data

In [None]:
items_avg_years = data.groupby(["Year", "Commodity"])["Average"].mean().unstack(level = 0).fillna(value = 0)

def count_zeros(row):
    return (row==0).sum()
zero_count = items_avg_years.apply(count_zeros, axis = 1)

filtered_items_avg_zero_lesseq5 = items_avg_years[(zero_count<=5)&(zero_count>0)]

filtered_items_avg_zero_lesseq5["Coefficient of Variation"] = (filtered_items_avg_zero_lesseq5.std(axis = 1)/filtered_items_avg_zero_lesseq5.mean(axis = 1))*100

sorted_items = filtered_items_avg_zero_lesseq5.sort_values(by = ["Coefficient of Variation"],
                               ascending = True)
less_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,7))

sns.heatmap(less_10_variations.iloc[:,:-1], 
            linewidths = 1,
            cmap='Set1_r', 
            annot = True,
           fmt = ".1f")
plt.ylabel("Commodity with Lowest Price Fluctuation (Ascending)")
plt.xlabel("Year")
plt.title("Top 10 Commodities with Lowest Price Fluctuation (Min. 5 Years of Sales With Atleast One Missing", fontsize = 12)
plt.tight_layout()




1. These products can be assumed to be locally produced in the country because their prices show relatively low fluctuations compared to other analyzed products and most of the products in this category has a price range between Rs 50-100.

2. In 2020, during the peak of the Covid-19 pandemic, the prices of Litchi, Lemon, and Pear (Local) experienced a sharp decline. This could be attributed to the lack of a market during that period, leading sellers to focus on clearing their stock rather than maximizing profit.

3. Clove Dry saw a significant price increase in 2020, almost doubling from previous years, and is now gradually decreasing. This suggests a potential relationship between the price of Clove Dry and the Covid-19 pandemic.

4. The price of Bauhinia Flower doubled after the Covid-19 pandemic, despite the Kalimati Market not selling any in 2020. This price increase warrants further investigation.

5. Kalimati Market appears to have stopped selling fresh fish from 2021 and began re-selling Sugarcane and Turnip after 2020. However, it's essential to exercise caution when drawing conclusions solely from zero figures, as there may be underlying factors at play.

#### Price Fluctuation For Commodities With More Than 5 Years Data Inavailability (Limited Data)

In [None]:
filtered_items_avg_zero_gr5 = items_avg_years[zero_count>5]

filtered_items_avg_zero_gr5["Coefficient of Variation"] = (filtered_items_avg_zero_gr5.std(axis = 1)/filtered_items_avg_zero_gr5.mean(axis = 1))*100

sorted_items = filtered_items_avg_zero_gr5.sort_values(by = ["Coefficient of Variation"],
                               ascending = True)
less_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,7))

sns.heatmap(less_10_variations.iloc[:,:-1], 
            linewidths = 1,
            cmap='Accent_r', 
            annot = True,
           fmt = ".1f")
plt.ylabel("Commodity with Lowest Price Fluctuation (Ascending)")
plt.xlabel("Year")
plt.title("Top 10 Commodities with Lowest Price Fluctuation (Limited Data)", fontsize = 12)
plt.tight_layout()



1. This category comprises products that are relatively new additions to the Kalimati Market, introduced in the year of the Covid-19 pandemic in 2019.

2. Upon closer examination, these products appear to be generally imported from foreign countries, indicating a shift in the market dynamics.

3. The Terai region of Nepal, known for its agricultural production, saw the late introduction of pointed gourd to the Kalimati Market, occurring only in 2019.

4. Among the fish varieties, the price of Fish Fresh (Mungari) exhibits notable inconsistency, with a significant price increase of nearly 50 Rs per kilo observed in 2023.

#### Price Fluctuation of Commodities That Remained Always Available

In [None]:
filtered_items_avg_zero_no = items_avg_years[zero_count==0]

filtered_items_avg_zero_no["Coefficient of Variation"] = (filtered_items_avg_zero_no.std(axis = 1)/filtered_items_avg_zero_no.mean(axis = 1))*100

sorted_items = filtered_items_avg_zero_no.sort_values(by = ["Coefficient of Variation"],
                               ascending = True)
less_10_variations = sorted_items.head(10)


plt.figure(figsize = (10,7))

sns.heatmap(less_10_variations.iloc[:,:-1], 
            linewidths = 1,
            cmap='Accent_r', 
            annot = True,
           fmt = ".1f")
plt.ylabel("Commodity with Lowest Price Fluctuation (Ascending)")
plt.xlabel("Year")
plt.title("Top 10 Commodities With Lowest Price Fluctuation Consistently Available in the Market", fontsize = 12)
plt.tight_layout()


1. This category encompasses commodities that have been consistently available in the market.

2. Notably, even among consistently available items, some products are changing their price classes. For example, Papaya (Nepali) transitioned to the next price class, which includes items in the range of 60 - 80.

3. In 2020, Mushroom (Kanye), which had maintained a price range of 120-140 for several years, made a significant leap into the price range of 160 - 180.

4. Snake Gourd's price has shown minimal change over time, suggesting it may not be a popular choice among Nepalese families or that its production remains consistent.

5. Tomato Small (Local) follows a similar pattern to Snake Gourd, with relatively stable prices over the years.

6. Okara, on the other hand, has shifted between three different price classes. In 2023, its price jumped to the category of 80 - 100, whereas it primarily ranged between 60 - 80 from 2016 to 2022.

## Seasonal Analysis

In our calculations, we have opted not to utilize the mean as the primary measure but have instead chosen the median. This choice stems from our analysis of seasonal prices spanning a 9-year period. Our rationale for this preference is rooted in the understanding that employing the mean as the foundational metric for calculating prices during a specific season may lead to a disproportionate influence of outliers in certain scenarios.

In [None]:
data.head()

Spring (March-May)
Summer (June-August)
Autumn (September-November)
Winter (December-February)

In [None]:
month_to_season = {
    'January': 'Winter',
    'February': 'Winter',
    'March': 'Spring',
    'April': 'Spring',
    'May': 'Spring',
    'June': 'Summer',
    'July': 'Summer',
    'August': 'Summer',
    'September': 'Autumn',
    'October': 'Autumn',
    'November': 'Autumn',
    'December': 'Winter'
}

data["Season"] = data["Month"].map(month_to_season)
data.head()

In [None]:
items_avg_season = data.groupby(["Season", "Commodity"])["Average"].median().unstack(level = 0).fillna(value = 0)
items_avg_season.head(10)

### Items With Highest And Lowest Price Fluctuation Seasonal

#### Price Fluctuation For Commodities With Minimum of 5 Years of Data With Atleast One Missing

In [None]:
def count_zeros(row):
    return (row==0).sum()
zero_count = items_avg_season.apply(count_zeros, axis = 1)

In [None]:
filtered_items_avg_zero_lesseq5 = items_avg_season[(zero_count<=3)]

In [None]:
filtered_items_avg_zero_lesseq5["Coefficient of Variation"] = (filtered_items_avg_zero_lesseq5.std(axis = 1)/filtered_items_avg_zero_lesseq5.mean(axis = 1))*100
sorted_items = filtered_items_avg_zero_lesseq5.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,7))

sns.heatmap(top_10_variations.iloc[:,:-1], 
            linewidths = 1,
            cmap='Dark2_r', 
            annot = True,
           fmt = ".1f")
plt.ylabel("Commodity with Highest Price Fluctuation (Descending)")
plt.xlabel("Season")
plt.title("Top 10 Commodities with Highest Price Fluctuation (Min. 5 Years of Sales Atleast One Missing", fontsize = 12)
plt.tight_layout()

1. Maize exhibits the highest price fluctuations when analyzed seasonally, but the data appears perplexing. Maize's price is 6.5 during the summer and then sharply jumps to a range of 50 - 100 in winter. 

2. Assuming the data is accurate, it suggests that maize is harvested during the summer, and the surplus from this season is stored in cold storage facilities. Subsequently, it is brought to the market in winter at a premium price.

3. Most of the commodities listed in this category are considered summer goods, as their prices are relatively lower during the summer compared to other seasons. However, exceptions to this trend include Mango (Chousa) and Litchi (Indian).

In [None]:
sorted_items = filtered_items_avg_zero_lesseq5.sort_values(by = ["Coefficient of Variation"],
                               ascending = True)
less_10_variations = sorted_items.head(10)


plt.figure(figsize = (10,7))

sns.heatmap(less_10_variations.iloc[:,:-1], 
            linewidths = 1,
            cmap='Dark2_r', 
            annot = True,
           fmt = ".1f")
plt.ylabel("Commodity with Highest Price Fluctuation (Ascending)")
plt.xlabel("Season")
plt.title("Top 10 Commodities with Lowest Price Fluctuation (Min. 5 Years of Sales Atleast One Missing)", fontsize = 12)
plt.tight_layout()



Certainly, let's summarize the price trends based on the provided information:

1. **Apple (Fuji):**
   - Price drops in Spring and Winter.
   - Price goes up in Autumn and Summer.
   - The price drop is not drastic.

2. **Tofu, Pineapple, and Apple (Jholey):**
   - These are the cheapest items on the list.

3. **Fish Fresh (Rahu):**
   - Prices remain pretty consistent throughout the year.
   - A slight increase of 20 - 30 Rs in Spring and Summer.
   - It's cheaper than Fish Rahu.
   - The price drops around 10 - 12 Rs in Summer.

#### Price Fluctuation For Commodities With More Than 5 Years Data Inavailability (Limited Data)

In [None]:
filtered_items_avg_zero_gr5 = items_avg_season[zero_count>5]

filtered_items_avg_zero_gr5["Coefficient of Variation"] = (filtered_items_avg_zero_gr5.std(axis = 1)/filtered_items_avg_zero_gr5.mean(axis = 1))*100

sorted_items = filtered_items_avg_zero_gr5.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)

plt.figure(figsize = (10,5))
for commodity in top_10_variations.index:
    plt.plot(top_10_variations.columns[:-1],
             top_10_variations.loc[commodity][:-1], label = commodity)
plt.xlabel("Season", fontsize = 12)
plt.ylabel("Price", fontsize = 12)
plt.title("Top 10 Commodities with Highest Price Fluctuation (Limited Data)", fontsize = 12)
plt.legend()
plt.grid(True)


#### Price Fluctuation of Commodities That Remained Always Available

In [None]:
filtered_items_avg_zero_no = items_avg_season[zero_count==0]

filtered_items_avg_zero_no["Coefficient of Variation"] = (filtered_items_avg_zero_no.std(axis = 1)/filtered_items_avg_zero_no.mean(axis = 1))*100

sorted_items = filtered_items_avg_zero_no.sort_values(by = ["Coefficient of Variation"],
                               ascending = False)
top_10_variations = sorted_items.head(10)


plt.figure(figsize = (10,7))

sns.heatmap(top_10_variations.iloc[:,:-1], 
            linewidths = 1,
            cmap='Dark2_r', 
            annot = True,
           fmt = ".1f")
plt.ylabel("Commodity with Highest Price Fluctuation (Ascending)")
plt.xlabel("Season")
plt.title("Top 10 Commodities With Highest Price Fluctuation Consistently Available in the Market", fontsize = 12)
plt.tight_layout()






1. **Mombin:**
   - Exhibits significant price volatility.
   - In Spring, its price nearly doubles.

2. **Cabbage:**
   - Appears to be a commonly produced and used item.
   - Maintains fairly consistent prices in all seasons.
   - Prices only show an increase in Autumn.

3. **Guava:**
   - Experiences a sharp price increase in Spring.
   - Becomes more affordable in Autumn.

4. **Seasonal Price Comparison:**
   - Generally, most commodities are cheaper in Spring.
   - Prices are slightly higher in Summer.
   - Winter is the most expensive season for these items.

In [None]:

sorted_items = filtered_items_avg_zero_no.sort_values(by = ["Coefficient of Variation"],
                               ascending = True)
less_10_variations = sorted_items.head(10)



plt.figure(figsize = (10,7))

sns.heatmap(less_10_variations.iloc[:,:-1], 
            linewidths = 1,
            cmap='Dark2_r', 
            annot = True,
           fmt = ".1f")
plt.ylabel("Commodity with Highest Price Fluctuation (Ascending)")
plt.xlabel("Season")
plt.title("Top 10 Commodities With Lowest Price Fluctuation Consistently Available in the Market", fontsize = 12)
plt.tight_layout()



1. **Price Consistency:**
   - Products in this category exhibit fairly consistent prices.
   - Prices fluctuate within a narrow range of 10-14 Rs.
   
2. **Adequate Production:**
   - These consistent price trends suggest that production of these products is fairly adequate across all seasons.

## Analyzing the Most Expensive and Least Expensive Product Over the Years and Seasonwise

In [None]:
max_price = data.groupby(["Year", "Commodity"])["Average"].max().unstack(level = 1).fillna(value = 0)
max_price.head()

In [None]:
def make_autopct(values):
    def my_autopct(pct):
        total = sum(values)
        val = int(round(pct*total/100.0))
        return '{v:d} ({p:.2f}'.format(p = pct,v=val)+"%)"
    return my_autopct

In [None]:
colors = ['gold', 'lightcoral', 'lightskyblue', 'lightgreen', 'plum']
for year in max_price.index:
    top5 = max_price.loc[year].nlargest(5)
    plt.figure(figsize = (6,5))
    plt.pie(x = top5.values, labels = top5.index, autopct = make_autopct(top5.values),colors = colors)
    plt.gca().add_artist(plt.Circle((0,0), radius = 0.70, fc = "white"))
    plt.title(f'Top 5 Most Expensive Commodities in {year}')
    plt.axis("equal")
    plt.tight_layout()
    plt.show()
    
    

In [None]:
colors = ['gold', 'lightcoral', 'lightskyblue', 'lightgreen', 'plum']
for year in max_price.index:
    top5 = max_price.loc[year][max_price.loc[year]>0].nsmallest(5)
    
    plt.figure(figsize = (6,5))
    plt.pie(x = top5.values, labels = top5.index, autopct = make_autopct(top5.values),colors = colors)
    plt.gca().add_artist(plt.Circle((0,0), radius = 0.70, fc = "white"))
    plt.title(f'Top 5 Least Expensive Commodities in {year}')
    plt.axis("equal")
    plt.tight_layout()
    plt.show()
    

### Seasonwise Most Expensive

In [None]:
max_price = data.groupby(["Season", "Commodity"])["Average"].max().unstack(level = 1).fillna(value = 0)
max_price.head()

In [None]:
colors = ['gold', 'lightcoral', 'lightskyblue', 'lightgreen', 'plum']
for season in max_price.index:
    top5 = max_price.loc[season].nlargest(5)
    plt.figure(figsize = (6,5))
    plt.pie(x = top5.values, labels = top5.index, autopct = make_autopct(top5.values),colors = colors)
    plt.gca().add_artist(plt.Circle((0,0), radius = 0.70, fc = "white"))
    plt.title(f'Top 5 Most Expensive Commodities in {season}')
    plt.axis("equal")
    plt.tight_layout()
    plt.show()

In [None]:
colors = ['gold', 'lightcoral', 'lightskyblue', 'lightgreen', 'plum']
for season in max_price.index:
    top5 = max_price.loc[season][max_price.loc[season]>0].nsmallest(5)
    plt.figure(figsize = (6,5))
    plt.pie(x = top5.values, labels = top5.index, autopct = make_autopct(top5.values),colors = colors)
    plt.gca().add_artist(plt.Circle((0,0), radius = 0.70, fc = "white"))
    plt.title(f'Top 5 Least Expensive Commodities in {season}')
    plt.axis("equal")
    plt.tight_layout()
    plt.show()