In [None]:
import sqlite3
import pandas as pd

df=pd.read_excel("myntra.xlsx")

connection = sqlite3.connect('my_database.db')

df.to_sql('myntra', connection, if_exists='replace', index=False)

# Step 4: Query the SQLite database to check if the data is loaded
cursor = connection.cursor()
cursor.execute("SELECT * FROM myntra")

# Fetch and display the results
rows = cursor.fetchall()
for row in rows:
    print(row)




In [4]:
column_names = df.columns.tolist()
column_names

['Product ID',
 'Category',
 'Sub-category',
 'Product Name',
 'Brand Name',
 'Size',
 'Color',
 'Ratings']

In [5]:
print("First few rows of the dataset:")
print(df.head())

First few rows of the dataset:
  Product ID Category Sub-category Product Name Brand Name Size  Color  \
0      P0001    Women      Topwear     T-Shirts   Roadster   40  Black   
1      P0002    Women      Topwear     T-Shirts   Roadster   40   Blue   
2      P0003    Women      Topwear     T-Shirts   Roadster   42  White   
3      P0004    Women      Topwear     T-Shirts   Roadster   44  Green   
4      P0005    Women      Topwear     T-Shirts       Puma   38  Olive   

   Ratings  
0        4  
1        5  
2        3  
3        2  
4        5  


In [6]:
print("\nSummary statistics for Ratings:")
print(df['Ratings'].describe())


Summary statistics for Ratings:
count    3071.000000
mean        4.002931
std         0.971411
min         2.000000
25%         3.000000
50%         4.000000
75%         5.000000
max         5.000000
Name: Ratings, dtype: float64


In [7]:
print("\nCount of products by Category and Sub-category:")
category_counts = df.groupby(['Category', 'Sub-category']).size().reset_index(name='Count')
print(category_counts)


Count of products by Category and Sub-category:
   Category          Sub-category  Count
0    Beauty              Haircare    139
1    Beauty                Makeup    323
2    Beauty              Skincare    158
3      Kids         Boys Clothing    108
4      Kids              Footwear    100
5      Kids        Girls Clothing    179
6      Kids               Infants     82
7       Men            Bottomwear    377
8       Men              Footwear    209
9       Men               Topwear    403
10    Women              Footwear    241
11    Women  Indian & Fusion Wear    312
12    Women               Topwear     60
13    Women          Western Wear    380


In [8]:
print("\nAverage Ratings by Brand:")
average_ratings_by_brand = df.groupby('Brand Name')['Ratings'].mean().reset_index()
average_ratings_by_brand = average_ratings_by_brand.sort_values(by='Ratings', ascending=False)  # Sort by ratings
print(average_ratings_by_brand)


Average Ratings by Brand:
                   Brand Name   Ratings
6                      Campus  4.428571
59                    Sunsilk  4.409091
35                      Metro  4.375000
9                       Crocs  4.333333
60               Swiss Beauty  4.315789
..                        ...       ...
63  United Colors of Benetton  3.750000
21                    Krayons  3.736842
44                       ONLY  3.714286
22                   La Zoire  3.666667
70                   Woodland  3.411765

[72 rows x 2 columns]


In [9]:
top_rated_products = df.nlargest(5, 'Ratings')[['Product Name', 'Brand Name', 'Ratings']]
print("\nTop Rated Products:")
print(top_rated_products)


Top Rated Products:
   Product Name Brand Name  Ratings
1      T-Shirts   Roadster        5
4      T-Shirts       Puma        5
7      T-Shirts       Puma        5
10     T-Shirts       Puma        5
11     T-Shirts       Puma        5


In [10]:
print("\nCount of Products per Brand:")
brand_counts = df['Brand Name'].value_counts().reset_index()
brand_counts.columns = ['Brand Name', 'Count']
print(brand_counts)


Count of Products per Brand:
     Brand Name  Count
0           H&M    266
1      Roadster    218
2          Puma    213
3      Here&Now    137
4   Allen Solly    121
..          ...    ...
67    Moms Home      6
68      Set Wet      6
69    Hopscotch      6
70      Catwalk      4
71     La Zoire      3

[72 rows x 2 columns]


In [11]:
print("\nProduct Distribution by Size for Each Brand:")
size_distribution = df.groupby(['Brand Name', 'Size']).size().unstack(fill_value=0)
print(size_distribution)


Product Distribution by Size for Each Brand:
Size         26  28  30  32  34  36  38  40  42  44  ...  9 UK  9-12M   L   M  \
Brand Name                                           ...                        
Adidas        0   0   0   4  17   7  20   6   5  16  ...     7      0   0   0   
Allen Solly   0   5   5   6   7  15   9   9  20   5  ...     0      0   0  10   
Bata          0   0   0   0   0   0   0   0   0   0  ...     1      7   0   0   
Biba          0   0   0   0   0   0   0   0   0   0  ...     0      0   6  31   
Biotique      0   0   0   0   0   0   0   0   0   0  ...     0      0   0   0   
...          ..  ..  ..  ..  ..  ..  ..  ..  ..  ..  ...   ...    ...  ..  ..   
Vera Moda     0   0   1  11   0   6   0   0   0   0  ...     0      0   0   0   
W             0   0   0   0   6  10   0   0   0   0  ...     0      0  10   0   
Wildstone     0   0   0   0   0   0   0   0   0   0  ...     0      0   0   0   
Woodland      0   0   0   0   0   0   6   0   4   0  ...     2 

In [12]:
rating_threshold = 3.0
print(f"\nItems with Ratings Below {rating_threshold}:")
low_rated_items = df[df['Ratings'] < rating_threshold][['Product Name', 'Brand Name', 'Ratings']]
print(low_rated_items)


Items with Ratings Below 3.0:
     Product Name              Brand Name  Ratings
3        T-Shirts                Roadster        2
9        T-Shirts                    Puma        2
18       T-Shirts                   Wrogn        2
38       T-Shirts  HRX by Hrithik Roshan         2
46       T-Shirts                  Adidas        2
...           ...                     ...      ...
3020        Shoes                 Liberty        2
3038      Sandals                 Liberty        2
3046      Sandals                    Puma        2
3049      Sandals                    Puma        2
3058      Sandals                    Bata        2

[313 rows x 3 columns]


In [14]:
print("\nTop Brands by Average Rating:")
top_brands = df.groupby('Brand Name')['Ratings'].mean().reset_index()
top_brands = top_brands.sort_values(by='Ratings', ascending=False)
print(top_brands)


Top Brands by Average Rating:
                   Brand Name   Ratings
6                      Campus  4.428571
59                    Sunsilk  4.409091
35                      Metro  4.375000
9                       Crocs  4.333333
60               Swiss Beauty  4.315789
..                        ...       ...
63  United Colors of Benetton  3.750000
21                    Krayons  3.736842
44                       ONLY  3.714286
22                   La Zoire  3.666667
70                   Woodland  3.411765

[72 rows x 2 columns]
