# Cognitive Computing Assignment 7
**Name:** Aditya

<a href="https://colab.research.google.com/github/aditya-username/cognitive-computing/blob/main/cc_ass7_aditya.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Q1: Data Preparation and Summary

In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Set random seed for reproducibility (different from others)
np.random.seed(20240505)

In [3]:
# Create random sales data for 12 months and 4 categories
sales = np.random.randint(1200, 5200, size=(12, 4))
months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
categories = ["Electronics", "Clothing", "Home & Kitchen", "Sports"]
df = pd.DataFrame(sales, columns=categories, index=months)
df

### Part-II: Display first few rows and statistics

In [4]:
print("First 5 months of sales data:")
display(df.head())
print("\nStatistical summary:")
display(df.describe())

## Q2: Data Visualization

In [5]:
# Line plot of sales for each category over months
plt.figure(figsize=(10,6))
for col in df.columns:
    plt.plot(df.index, df[col], marker='o', label=col)
plt.title("Monthly Sales by Category")
plt.xlabel("Month")
plt.ylabel("Sales")
plt.legend()
plt.grid(True, linestyle='--', alpha=0.5)
plt.tight_layout()
plt.show()

In [6]:
# Bar plot for total sales per category
plt.figure(figsize=(8,5))
sns.barplot(x=df.columns, y=df.sum(), palette='viridis')
plt.title("Total Sales by Category (Year)")
plt.ylabel("Total Sales")
plt.xlabel("Category")
plt.tight_layout()
plt.show()

## Q3: Data Manipulation

In [7]:
# Add a new column: Total sales per month
df['Total'] = df.sum(axis=1)
df

In [8]:
# Find the month with the highest total sales
max_month = df['Total'].idxmax()
print(f"Month with highest total sales: {max_month} ({df.loc[max_month, 'Total']})")

In [9]:
# Calculate average sales for each category
category_avgs = df[categories].mean()
print("Average sales per category:")
print(category_avgs)

## Q4: Advanced Analysis

In [10]:
# Correlation heatmap between categories
plt.figure(figsize=(6,5))
sns.heatmap(df[categories].corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title("Correlation between Categories")
plt.tight_layout()
plt.show()

In [11]:
# Identify which category has the most consistent sales (lowest std deviation)
stds = df[categories].std()
most_consistent = stds.idxmin()
print(f"Most consistent sales: {most_consistent} (std: {stds[most_consistent]:.2f})")

## Q5: Exporting Data

In [12]:
# Save DataFrame to CSV
df.to_csv("monthly_sales_aditya.csv")
print("Data exported to 'monthly_sales_aditya.csv'")