In [None]:
import pandas as pd
import numpy as np

1. **Sales Data Analysis**

Use Case: Analyze sales data for trends, customer segmentation, or performance.

Operations:
Count total sales per product category.

Calculate the total revenue generated by each sales representative.

Find the product with the highest sales.

Group data by sales regions and calculate average sales.

In [None]:
data = {
    'product_category': ['Electronics', 'Furniture', 'Electronics', 'Toys', 'Furniture'],
    'product': ['Laptop', 'Sofa', 'Phone', 'Toy Car', 'Table'],
    'units_sold': [100, 50, 200, 80, 60],
    'revenue': [1000, 1500, 2000, 1200, 1800],
    'sales_rep': ['Alice', 'Bob', 'Alice', 'Charlie', 'Bob'],
    'region': ['North', 'South', 'North', 'West', 'South'],
}

# Create a DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
print(df)


  product_category  product  units_sold  revenue sales_rep region
0      Electronics   Laptop         100     1000     Alice  North
1        Furniture     Sofa          50     1500       Bob  South
2      Electronics    Phone         200     2000     Alice  North
3             Toys  Toy Car          80     1200   Charlie   West
4        Furniture    Table          60     1800       Bob  South


In [None]:
# Group by product category and calculate total units sold
total_sales_per_category = df.groupby('product_category')['units_sold'].sum().reset_index()

print(total_sales_per_category)


  product_category  units_sold
0      Electronics         300
1        Furniture         110
2             Toys          80


In [None]:
# Group by sales representative and calculate total revenue
total_revenue_per_rep = df.groupby('sales_rep')['revenue'].sum().reset_index()

print(total_revenue_per_rep)

  sales_rep  revenue
0     Alice     3000
1       Bob     3300
2   Charlie     1200


In [None]:
# Find the product with the highest units sold
best_selling_product = df.loc[df['units_sold'].idxmax()]

print(best_selling_product)

product_category    Electronics
product                   Phone
units_sold                  200
revenue                    2000
sales_rep                 Alice
region                    North
Name: 2, dtype: object


In [None]:
avg_sales_per_region = df.groupby('region')['revenue'].mean().reset_index()

print(avg_sales_per_region)

  region  revenue
0  North   1500.0
1  South   1650.0
2   West   1200.0


**2. Employee Data Analysis**

Use Case: Manage and analyze employee-related data like salaries, departments, and performance.

Operations:
Count the number of employees per department.

Find the employee with the highest salary.

Calculate average salary per department.

Sort employees based on their performance score or salary.

In [None]:
data = {
    'employee_id': [1, 2, 3, 4, 5],
    'department': ['HR', 'IT', 'HR', 'Finance', 'IT'],
    'salary': [60000, 90000, 65000, 75000, 80000],
    'performance_score': [85, 92, 88, 76, 95]
}

df = pd.DataFrame(data)
print(df.head())

# Count employees per department
employee_count_per_dept = df.groupby('department')['employee_id'].count()
print(employee_count_per_dept)

# Find the employee with the highest salary
highest_salary_employee = df.loc[df['salary'].idxmax()]
print(highest_salary_employee)

# Calculate average salary per department
avg_salary_per_dept = df.groupby('department')['salary'].mean()
print(avg_salary_per_dept)

# Sort employees based on performance score or salary
sorted_employees = df.sort_values(by='performance_score', ascending=False)
print(sorted_employees)

   employee_id department  salary  performance_score
0            1         HR   60000                 85
1            2         IT   90000                 92
2            3         HR   65000                 88
3            4    Finance   75000                 76
4            5         IT   80000                 95
department
Finance    1
HR         2
IT         2
Name: employee_id, dtype: int64
employee_id              2
department              IT
salary               90000
performance_score       92
Name: 1, dtype: object
department
Finance    75000.0
HR         62500.0
IT         85000.0
Name: salary, dtype: float64
   employee_id department  salary  performance_score
4            5         IT   80000                 95
1            2         IT   90000                 92
2            3         HR   65000                 88
0            1         HR   60000                 85
3            4    Finance   75000                 76


**3. Financial Data (Stock Market)**

Use Case: Perform financial data analysis for stocks, assets, and investments.

Operations:
Calculate daily, weekly, or monthly stock returns.

Find the stock with the highest or lowest closing price.

Compute moving averages for stock prices.

Group stock data by industry or sector and calculate key metrics like P/E ratio or market cap.

In [None]:
data = {
    'date': pd.date_range(start='2023-09-01', periods=5, freq='D'),
    'close': [100, 102, 101, 103, 105],
    'stock_id': [1, 2, 3, 4, 5],
    'industry': ['Tech', 'Finance', 'Tech', 'Retail', 'Finance'],
    'price': [150, 200, 170, 80, 220],
    'earnings': [10, 20, 15, 8, 25],
    'market_cap': [1e9, 2e9, 1.5e9, 500e6, 2.2e9]
}

df = pd.DataFrame(data)

# Set the 'date' column as the index
df = df.set_index('date')

# Calculate daily, weekly, or monthly stock returns
daily_returns = df['close'].pct_change()
weekly_returns = df['close'].resample('W').ffill().pct_change()
monthly_returns = df['close'].resample('M').ffill().pct_change()
print(daily_returns)
print(weekly_returns)
print(monthly_returns)

date
2023-09-01         NaN
2023-09-02    0.020000
2023-09-03   -0.009804
2023-09-04    0.019802
2023-09-05    0.019417
Name: close, dtype: float64
date
2023-09-03         NaN
2023-09-10    0.039604
Freq: W-SUN, Name: close, dtype: float64
date
2023-09-30   NaN
Freq: M, Name: close, dtype: float64


In [None]:
df.head()

Unnamed: 0_level_0,close,stock_id,industry,price,earnings,market_cap
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-09-01,100,1,Tech,150,10,1000000000.0
2023-09-02,102,2,Finance,200,20,2000000000.0
2023-09-03,101,3,Tech,170,15,1500000000.0
2023-09-04,103,4,Retail,80,8,500000000.0
2023-09-05,105,5,Finance,220,25,2200000000.0


In [None]:
#Find the stock with the highest or lowest closing price.
highest_closing_stock = df.loc[df['close'].idxmax()]
print(highest_closing_stock)
lowest_closing_stock = df.loc[df['close'].idxmin()]
print(lowest_closing_stock)

close                  105
stock_id                 5
industry           Finance
price                  220
earnings                25
market_cap    2200000000.0
Name: 2023-09-05 00:00:00, dtype: object
close                  100
stock_id                 1
industry              Tech
price                  150
earnings                10
market_cap    1000000000.0
Name: 2023-09-01 00:00:00, dtype: object


In [None]:
#Compute moving averages for stock prices.
df['moving_average'] = df['close'].rolling(window=3).mean()
print(df)

            close  stock_id industry  price  earnings    market_cap  \
date                                                                  
2023-09-01    100         1     Tech    150        10  1.000000e+09   
2023-09-02    102         2  Finance    200        20  2.000000e+09   
2023-09-03    101         3     Tech    170        15  1.500000e+09   
2023-09-04    103         4   Retail     80         8  5.000000e+08   
2023-09-05    105         5  Finance    220        25  2.200000e+09   

            moving_average  
date                        
2023-09-01             NaN  
2023-09-02             NaN  
2023-09-03           101.0  
2023-09-04           102.0  
2023-09-05           103.0  


In [None]:
#Group stock data by industry or sector and calculate key metrics like P/E ratio or market cap.
industry_metrics = df.groupby('industry').agg({'price': 'mean', 'earnings': 'sum', 'market_cap': 'sum'})
print(industry_metrics)

          price  earnings    market_cap
industry                               
Finance   210.0        45  4.200000e+09
Retail     80.0         8  5.000000e+08
Tech      160.0        25  2.500000e+09


**4. Customer Data Analysis**

Use Case: Analyze customer behavior or segmentation data.

Operations:
Group customers by location and calculate total purchases per city.

Find the customer who made the highest number of purchases.

Calculate the average amount spent per customer.

Sort customers by their total purchase value.

In [None]:
data = {
    'customer_id': [1, 2, 3, 4, 5],
    'city': ['New York', 'Los Angeles', 'New York', 'Chicago', 'Los Angeles'],
    'total_purchase': [250, 150, 300, 200, 180]
}

df = pd.DataFrame(data)
print(df.head())

# Group customers by city and calculate total purchases
total_purchases_per_city = df.groupby('city')['total_purchase'].sum()
print(total_purchases_per_city)

# Find the customer who made the highest number of purchases
customer_with_most_purchases = df.loc[df['total_purchase'].idxmax()]
print(customer_with_most_purchases)

# Sort customers by total purchase value
sorted_customers = df.sort_values(by='total_purchase', ascending=False)
print(sorted_customers)

# Calculate the average amount spent per customer
avg_purchase_per_customer = df['total_purchase'].mean()
print(avg_purchase_per_customer)

   customer_id         city  total_purchase
0            1     New York             250
1            2  Los Angeles             150
2            3     New York             300
3            4      Chicago             200
4            5  Los Angeles             180
city
Chicago        200
Los Angeles    330
New York       550
Name: total_purchase, dtype: int64
customer_id              3
city              New York
total_purchase         300
Name: 2, dtype: object
   customer_id         city  total_purchase
2            3     New York             300
0            1     New York             250
3            4      Chicago             200
4            5  Los Angeles             180
1            2  Los Angeles             150
216.0


**5. Healthcare Data**

Use Case: Analyze patient data or medical records to gain insights.

Operations:
Count the number of patients per hospital or department.

Calculate the average hospital stay per disease.

Identify the doctor with the most patients.

Sort patients based on age, condition severity, or treatment costs.

In [None]:
data = {
    'patient_id': [1, 2, 3, 4, 5],
    'hospital': ['City Hospital', 'County Hospital', 'City Hospital', 'General Hospital', 'County Hospital'],
    'department': ['Cardiology', 'Oncology', 'Cardiology', 'Neurology', 'Oncology'],
    'disease': ['Heart Disease', 'Cancer', 'Heart Disease', 'Stroke', 'Cancer'],
    'hospital_stay_days': [5, 10, 7, 12, 9],
    'doctor': ['Dr. Smith', 'Dr. Johnson', 'Dr. Smith', 'Dr. Brown', 'Dr. Johnson'],
    'age': [45, 60, 50, 70, 65],
    'condition_severity': [3, 5, 4, 2, 4],
    'treatment_cost': [10000, 20000, 15000, 18000, 12000]

}

df = pd.DataFrame(data)

print(df.head())

# Count patients per hospital or department
patient_count_per_dept = df.groupby('department')['patient_id'].count()
print(patient_count_per_dept)

#Calculate the average hospital stay per disease
avg_hospital_stay_per_disease = df.groupby('disease')['hospital_stay_days'].mean()
print(avg_hospital_stay_per_disease)

#Identify the doctor with the most patients.
doctor_with_most_patients = df['doctor'].value_counts().idxmax()
print(doctor_with_most_patients)

#sort patients based on age, condition severity, or treatment costs.
sorted_patients = df.sort_values(by='age', ascending=False)
print(sorted_patients)


   patient_id          hospital  department        disease  \
0           1     City Hospital  Cardiology  Heart Disease   
1           2   County Hospital    Oncology         Cancer   
2           3     City Hospital  Cardiology  Heart Disease   
3           4  General Hospital   Neurology         Stroke   
4           5   County Hospital    Oncology         Cancer   

   hospital_stay_days       doctor  age  condition_severity  treatment_cost  
0                   5    Dr. Smith   45                   3           10000  
1                  10  Dr. Johnson   60                   5           20000  
2                   7    Dr. Smith   50                   4           15000  
3                  12    Dr. Brown   70                   2           18000  
4                   9  Dr. Johnson   65                   4           12000  
department
Cardiology    2
Neurology     1
Oncology      2
Name: patient_id, dtype: int64
disease
Cancer            9.5
Heart Disease     6.0
Stroke           

**6. E-commerce Data Analysis**

Use Case: Analyze online sales, product views, and customer engagement data.

Operations:
Count total orders per product category.

Find the most viewed or most purchased product.

Calculate the average cart value of customers.

Sort products by total sales or views.

In [None]:
data = {
    'order_id': [1, 2, 3, 4, 5],
    'product_category': ['Electronics', 'Clothing', 'Electronics', 'Home', 'Clothing'],
    'product': ['Phone', 'Shirt', 'Laptop', 'Sofa', 'Jacket'],
    'views': [150, 200, 300, 100, 250],
    'purchases': [20, 35, 40, 10, 25],
    'cart_value': [1000, 200, 1500, 500, 300]
}

df = pd.DataFrame(data)
print(df.head())

# Count total orders per product category
order_count_per_category = df.groupby('product_category')['order_id'].count()
print(order_count_per_category)

   order_id product_category product  views  purchases  cart_value
0         1      Electronics   Phone    150         20        1000
1         2         Clothing   Shirt    200         35         200
2         3      Electronics  Laptop    300         40        1500
3         4             Home    Sofa    100         10         500
4         5         Clothing  Jacket    250         25         300
product_category
Clothing       2
Electronics    2
Home           1
Name: order_id, dtype: int64


In [None]:
#Find the most viewed or most purchased product.
most_viewed_product = df.loc[df['views'].idxmax()]
print(most_viewed_product)

most_purchased_product = df.loc[df['purchases'].idxmax()]
print(most_purchased_product)

order_id                      3
product_category    Electronics
product                  Laptop
views                       300
purchases                    40
cart_value                 1500
Name: 2, dtype: object
order_id                      3
product_category    Electronics
product                  Laptop
views                       300
purchases                    40
cart_value                 1500
Name: 2, dtype: object


In [None]:
#Calculate the average cart value of customers.
avg_cart_value = df['cart_value'].mean()
print(avg_cart_value)

700.0


In [None]:
#Sort products by total sales or views.
sorted_products = df.sort_values(by='purchases', ascending=False)
print(sorted_products)
sorted_products_views= df.sort_values(by='views', ascending=False)
print(sorted_products_views)

   order_id product_category product  views  purchases  cart_value
2         3      Electronics  Laptop    300         40        1500
1         2         Clothing   Shirt    200         35         200
4         5         Clothing  Jacket    250         25         300
0         1      Electronics   Phone    150         20        1000
3         4             Home    Sofa    100         10         500
   order_id product_category product  views  purchases  cart_value
2         3      Electronics  Laptop    300         40        1500
4         5         Clothing  Jacket    250         25         300
1         2         Clothing   Shirt    200         35         200
0         1      Electronics   Phone    150         20        1000
3         4             Home    Sofa    100         10         500


----------------------------------------------------------------------
**7. Sports Data Analytics**

Use Case: Analyze player or team performance data.

Operations:

Count the number of goals per team or player.

Calculate the average points scored per game.

Find the player with the highest score in a season.

Sort players or teams by win/loss ratios.

In [None]:
import pandas as pd

# Sample data
data = {'Player': ['A', 'B', 'C', 'D'],
        'Goals': [10, 15, 7, 20],
        'Games': [5, 7, 3, 10]}

df = pd.DataFrame(data)
df

Unnamed: 0,Player,Goals,Games
0,A,10,5
1,B,15,7
2,C,7,3
3,D,20,10


In [None]:
# Count the number of goals per team or player
df['Total Goals'] = df['Goals']
print(df)

  Player  Goals  Games  Total Goals
0      A     10      5           10
1      B     15      7           15
2      C      7      3            7
3      D     20     10           20


In [None]:
# Calculate average points (goals) scored per game
df['Avg Goals'] = df['Goals'] / df['Games']
print(df)

  Player  Goals  Games  Total Goals  Avg Goals
0      A     10      5           10   2.000000
1      B     15      7           15   2.142857
2      C      7      3            7   2.333333
3      D     20     10           20   2.000000


In [None]:
# Find the player with the highest score in a season
top_player = df.loc[df['Goals'].idxmax()]
print(top_player)

Player           D
Goals           20
Games           10
Total Goals     20
Avg Goals      2.0
Name: 3, dtype: object


In [None]:
# Sort players by win/loss ratio (assuming some win/loss data)
df['Wins'] = [3, 5, 2, 6]
df['Losses'] = [2, 2, 1, 4]
df['Win/Loss Ratio'] = df['Wins'] / df['Losses']
df_sorted = df.sort_values(by='Win/Loss Ratio', ascending=False)
print(df_sorted)

  Player  Goals  Games  Total Goals  Avg Goals  Wins  Losses  Win/Loss Ratio
1      B     15      7           15   2.142857     5       2             2.5
2      C      7      3            7   2.333333     2       1             2.0
0      A     10      5           10   2.000000     3       2             1.5
3      D     20     10           20   2.000000     6       4             1.5


-------------------------------------------------------------------------------
**8. Survey Data Analysis**

Use Case: Analyze responses from customer or employee surveys.

Operations:

Count responses per survey category (e.g., satisfaction, experience).

Calculate the average rating for each question.

Find the question with the most positive or negative responses.

Sort responses by rating or demographic group.

In [None]:
data = {'Category': ['Satisfaction', 'Experience', 'Satisfaction', 'Experience'],
        'Rating': [5, 4, 3, 5]}

df = pd.DataFrame(data)
df

Unnamed: 0,Category,Rating
0,Satisfaction,5
1,Experience,4
2,Satisfaction,3
3,Experience,5


In [None]:
# Count responses per survey category
category_count = df.groupby('Category').size()
print(category_count)

Category
Experience      2
Satisfaction    2
dtype: int64


In [None]:
# Calculate the average rating for each question
avg_rating = df.groupby('Category')['Rating'].mean()
avg_rating

Unnamed: 0_level_0,Rating
Category,Unnamed: 1_level_1
Experience,4.5
Satisfaction,4.0


In [None]:
# Find the question with the most positive response
most_positive = df.loc[df['Rating'].idxmax()]
most_positive

Unnamed: 0,0
Category,Satisfaction
Rating,5


In [None]:
# Sort responses by rating
df_sorted = df.sort_values(by='Rating', ascending=False)
print(df_sorted)

       Category  Rating
0  Satisfaction       5
3    Experience       5
1    Experience       4
2  Satisfaction       3


------------------------------------------------------------------------
**9. IoT and Sensor Data**

Use Case: Manage time series data from sensors for predictive maintenance or monitoring.

Operations:

Analyze average temperature, pressure, or humidity readings.

Identify anomalies in the sensor data over time.

Group sensor readings by location or machine and calculate metrics.

Sort sensor data by timestamp or severity.

In [None]:
# Sample data with Location column
data = {'Timestamp': ['2024-09-01', '2024-09-02', '2024-09-03', '2024-09-01', '2024-09-02'],
        'Temperature': [23, 25, 20, 24, 22],
        'Pressure': [1012, 1015, 1010, 1013, 1011],
        'Location': ['Machine1', 'Machine1', 'Machine2', 'Machine2', 'Machine1']}

df = pd.DataFrame(data)
df

Unnamed: 0,Timestamp,Temperature,Pressure,Location
0,2024-09-01,23,1012,Machine1
1,2024-09-02,25,1015,Machine1
2,2024-09-03,20,1010,Machine2
3,2024-09-01,24,1013,Machine2
4,2024-09-02,22,1011,Machine1


In [None]:
# Analyze average temperature and pressure readings
avg_temp = df['Temperature'].mean()
avg_pressure = df['Pressure'].mean()
print(avg_temp, avg_pressure)

22.8 1012.2


In [None]:
# Identify anomalies in sensor data (temperature > 24)
anomalies = df[df['Temperature'] > 24]
anomalies

Unnamed: 0,Timestamp,Temperature,Pressure,Location
1,2024-09-02,25,1015,Machine1


In [None]:

# Group sensor readings by Location and calculate average metrics
grouped_metrics = df.groupby('Location').agg({
    'Temperature': 'mean',
    'Pressure': 'mean'
}).reset_index()
grouped_metrics

Unnamed: 0,Location,Temperature,Pressure
0,Machine1,23.333333,1012.666667
1,Machine2,22.0,1011.5


In [None]:
# Sort sensor data by timestamp
df_sorted = df.sort_values(by='Timestamp')
df_sorted

Unnamed: 0,Timestamp,Temperature,Pressure,Location
0,2024-09-01,23,1012,Machine1
3,2024-09-01,24,1013,Machine2
1,2024-09-02,25,1015,Machine1
4,2024-09-02,22,1011,Machine1
2,2024-09-03,20,1010,Machine2


--------------------------------------------------------------------
**10. Social Media Analysis**

Use Case: Perform analysis on social media posts, hashtags, or engagement metrics.

Operations:

Count total likes, shares, or comments per post.

Find the post with the highest engagement.

Calculate average engagement per hashtag or user.

Sort posts based on date, likes, or shares.

In [None]:
# Sample data
import numpy as np
data = {'Post': ['Post1', 'Post2', 'Post3'],
        'Likes': [100, 150, 20],
        'Shares': [30, 20, 50],
        'Comments': [20, 34, 4]}

df = pd.DataFrame(data)
df

Unnamed: 0,Post,Likes,Shares,Comments
0,Post1,100,30,20
1,Post2,150,20,34
2,Post3,20,50,4


In [None]:
# Count total likes per post
df['Total Likes'] = df['Likes']
df

Unnamed: 0,Post,Likes,Shares,Comments,Total Likes
0,Post1,100,30,20,100
1,Post2,150,20,34,150
2,Post3,20,50,4,20


In [None]:
# Find the post with the highest engagement
top_post = df.loc[df['Likes'].idxmax()]
top_post

Unnamed: 0,1
Post,Post2
Likes,150
Shares,20
Comments,34
Total Likes,150


In [None]:
# Calculate average engagement (likes + shares+ comments) per post
df['Avg Engagement'] = (df['Likes'] + df['Shares'] + df['Comments']) / 3
print(df)

    Post  Likes  Shares  Comments  Total Likes  Avg Engagement
0  Post1    100      30        20          100       50.000000
1  Post2    150      20        34          150       68.000000
2  Post3     20      50         4           20       24.666667


In [None]:
# Sort posts by likes
df_sorted = df.sort_values(by='Likes', ascending=False)
df_sorted

Unnamed: 0,Post,Likes,Shares,Comments,Total Likes,Avg Engagement
1,Post2,150,20,34,150,68.0
0,Post1,100,30,20,100,50.0
2,Post3,20,50,4,20,24.666667


------------------------------------------------------------------------
**11. Educational Data Analysis**

Use Case: Analyze student performance, attendance, or academic records.

Operations:

Count the number of students per grade or subject.

Find the student with the highest overall GPA.

Calculate average scores per subject.

Sort students based on their grades or attendance rate.

In [None]:
# Sample data
data = {
    'Student': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'Grade': ['A', 'B', 'A', 'C', 'B'],
    'Subject': ['Math', 'Science', 'Math', 'Science', 'Math'],
    'GPA': [3.8, 3.5, 3.9, 3.6, 3.4]}

df = pd.DataFrame(data)
df

Unnamed: 0,Student,Grade,Subject,GPA
0,Alice,A,Math,3.8
1,Bob,B,Science,3.5
2,Charlie,A,Math,3.9
3,David,C,Science,3.6
4,Eve,B,Math,3.4


In [None]:
# Count the number of students per grade
grade_count = df.groupby('Grade').size()
grade_count

Unnamed: 0_level_0,0
Grade,Unnamed: 1_level_1
A,2
B,2
C,1


In [None]:
# Find the student with the highest overall GPA
top_student = df.loc[df['GPA'].idxmax()]
top_student

Unnamed: 0,2
Student,Charlie
Grade,A
Subject,Math
GPA,3.9


In [None]:
#Calculate average scores per subject:
average_scores = df.groupby('Subject')['GPA'].mean()
print(average_scores)

Subject
Math       3.70
Science    3.55
Name: GPA, dtype: float64


In [None]:
# Sort students based on GPA
df_sorted = df.sort_values(by='GPA', ascending=False)
df_sorted

Unnamed: 0,Student,Grade,Subject,GPA
2,Charlie,A,Math,3.9
0,Alice,A,Math,3.8
3,David,C,Science,3.6
1,Bob,B,Science,3.5
4,Eve,B,Math,3.4


------------------------------------------------------------------
**12. Inventory Management**

Use Case: Analyze inventory data to optimize stock levels.

Operations:

Count total stock per item or category.

Find the item with the highest or lowest stock levels.

Calculate average stock usage per day or week.

Sort items by their restock date or quantity in stock.

In [None]:
# Sample DataFrame
data = {
    'Item': ['Widget A', 'Widget B', 'Widget C', 'Widget D', 'Widget E'],
    'Category': ['Electronics', 'Electronics', 'Furniture', 'Furniture', 'Electronics'],
    'QuantityInStock': [150, 80, 50, 200, 60],
    'RestockDate': ['2024-10-15', '2024-9-20', '2024-11-1', '2024-10-5', '2024-10-25'],
    'StockUsagePerDay': [5, 3, 2, 10, 4]
}
df = pd.DataFrame(data)
df

Unnamed: 0,Item,Category,QuantityInStock,RestockDate,StockUsagePerDay
0,Widget A,Electronics,150,2024-10-15,5
1,Widget B,Electronics,80,2024-9-20,3
2,Widget C,Furniture,50,2024-11-1,2
3,Widget D,Furniture,200,2024-10-5,10
4,Widget E,Electronics,60,2024-10-25,4


In [None]:
# Count total stock per item
item_stock = df.groupby('Item')['QuantityInStock'].sum()

# Count total stock per category
category_stock = df.groupby('Category')['QuantityInStock'].sum()
print(item_stock, category_stock)

Item
Widget A    150
Widget B     80
Widget C     50
Widget D    200
Widget E     60
Name: QuantityInStock, dtype: int64 Category
Electronics    290
Furniture      250
Name: QuantityInStock, dtype: int64


In [None]:
# Item with the highest stock level
highest_stock_item = df.loc[df['QuantityInStock'].idxmax()]

# Item with the lowest stock level
lowest_stock_item = df.loc[df['QuantityInStock'].idxmin()]
print(highest_stock_item)
print(lowest_stock_item)

Item                 Widget D
Category            Furniture
QuantityInStock           200
RestockDate         2024-10-5
StockUsagePerDay           10
Name: 3, dtype: object
Item                 Widget C
Category            Furniture
QuantityInStock            50
RestockDate         2024-11-1
StockUsagePerDay            2
Name: 2, dtype: object


In [None]:
# Calculate the average stock usage per day
df['AverageStockUsagePerDay'] = df['StockUsagePerDay']

# Calculate the average stock usage per week
df['AverageStockUsagePerWeek'] = df['StockUsagePerDay'] * 7
print(df[['Item', 'AverageStockUsagePerDay', 'AverageStockUsagePerWeek']])

       Item  AverageStockUsagePerDay  AverageStockUsagePerWeek
0  Widget A                        5                        35
1  Widget B                        3                        21
2  Widget C                        2                        14
3  Widget D                       10                        70
4  Widget E                        4                        28


In [None]:
# Sort items by their restock date
sorted_by_restock_date = df.sort_values('RestockDate')
print(sorted_by_restock_date)

       Item     Category  QuantityInStock RestockDate  StockUsagePerDay  \
0  Widget A  Electronics              150  2024-10-15                 5   
4  Widget E  Electronics               60  2024-10-25                 4   
3  Widget D    Furniture              200   2024-10-5                10   
2  Widget C    Furniture               50   2024-11-1                 2   
1  Widget B  Electronics               80   2024-9-20                 3   

   AverageStockUsagePerDay  AverageStockUsagePerWeek  
0                        5                        35  
4                        4                        28  
3                       10                        70  
2                        2                        14  
1                        3                        21  
