In [149]:
import pandas as pd
import numpy as np

# Pandas Theory

# Pandas is an open-source software library for the Python programming language designed for data manipulation and analysis. It provides powerful and flexible data structures, particularly the DataFrame and Series.

# Data Manipulation and Analysis:
### Read & Write Data: Easily load data from various formats like CSV, Excel, SQL databases, and JSON.
### Data Clean : Handle Missing Data (e.g., Filling or Dropping Null Values), Remove Duplicates, and transform Correct Format.
### Data Selection and Filtering: Select specific columns, rows, or subsets of data based on conditions.
### Data Transformation: Reshape, Pivot, Merge, or Join DataFrames, as well as apply functions to transform data.
### Aggregation and Grouping: Applied Sums, Averages, or Counts on groups of data by the groupby() function.


In [9]:
df=pd.read_csv(r"C:\Users\hp\OneDrive\Desktop\retail sales.csv")
df[0:3]

Unnamed: 0,Date,Product Name,Product Type,Brand,Gender,Category,Country,Quantity,Unit Price ($),Amount ($),Payment Mode
0,2022-05-27,Fear of God Essentials Tee,T-shirt,Essentials,Unisex,Limited Edition,Japan,5,141.82,709.1,Card
1,2022-08-22,Yeezy Boost 350,Sneakers,Adidas,Unisex,Streetwear,USA,2,267.34,534.68,Cash on Delivery
2,2022-04-03,Nike Dunk Low,Sneakers,Nike,Women,Limited Edition,Germany,1,292.0,292.0,Cash on Delivery


# 1.Pandas Series

# A one-dimensional array or Data holds any data type. It has a single column or row of a DataFrame.

In [22]:
labels=['Name','Age','Height','Weight','Address']
S=pd.Series(labels)
S

0       Name
1        Age
2     Height
3     Weight
4    Address
dtype: object

In [23]:
my_series_2=['Aaron',29,180,75,'Austin,Texas']
Ser=pd.Series(my_series_2,labels)
Ser

Name              Aaron
Age                  29
Height              180
Weight               75
Address    Austin,Texas
dtype: object

In [24]:
my_Ser=pd.Series(my_series_2,index=labels)
my_Ser

Name              Aaron
Age                  29
Height              180
Weight               75
Address    Austin,Texas
dtype: object

In [25]:
Ser_1=pd.Series([10,20,30,400,500])
Ser_1

0     10
1     20
2     30
3    400
4    500
dtype: int64

In [39]:
Paired={'Mouse':'Pad','Key':'Hole',
   'Wired':'Wireless','Numpy':'Pandas',
  'Series':'DataFrame','Laptop':'Computer',
  'Debt':'Equity','AI':'ML'}
Paired

{'Mouse': 'Pad',
 'Key': 'Hole',
 'Wired': 'Wireless',
 'Numpy': 'Pandas',
 'Series': 'DataFrame',
 'Laptop': 'Computer',
 'Debt': 'Equity',
 'AI': 'ML'}

In [40]:
Paired.keys()

dict_keys(['Mouse', 'Key', 'Wired', 'Numpy', 'Series', 'Laptop', 'Debt', 'AI'])

In [41]:
par=pd.Series(Paired)
par

Mouse           Pad
Key            Hole
Wired      Wireless
Numpy        Pandas
Series    DataFrame
Laptop     Computer
Debt         Equity
AI               ML
dtype: object

In [49]:
par[-5:-1]

Numpy        Pandas
Series    DataFrame
Laptop     Computer
Debt         Equity
dtype: object

In [42]:
Series=df['Brand'][0:5]
Series

0    Essentials
1        Adidas
2          Nike
3          Nike
4          Nike
Name: Brand, dtype: object

# 2.Series Operation 

In [81]:
type(df['Product Type'])

pandas.core.series.Series

In [52]:
df['Product Type']

0       T-shirt
1      Sneakers
2      Sneakers
3        Hoodie
4      Sneakers
         ...   
348    Sneakers
349      Hoodie
350     Joggers
351    Sneakers
352    Sneakers
Name: Product Type, Length: 353, dtype: object

In [55]:
df[df['Product Type']=='Sneakers'][0:2]

Unnamed: 0,Date,Product Name,Product Type,Brand,Gender,Category,Country,Quantity,Unit Price ($),Amount ($),Payment Mode
1,2022-08-22,Yeezy Boost 350,Sneakers,Adidas,Unisex,Streetwear,USA,2,267.34,534.68,Cash on Delivery
2,2022-04-03,Nike Dunk Low,Sneakers,Nike,Women,Limited Edition,Germany,1,292.0,292.0,Cash on Delivery


In [98]:
df[df['Quantity']>2]['Quantity']

0      5
3      4
8      3
9      4
12     3
      ..
348    5
349    5
350    4
351    5
352    5
Name: Quantity, Length: 225, dtype: int64

In [75]:
df['Unit Price ($)'][0:5]

0    141.82
1    267.34
2    292.00
3    245.44
4    184.51
Name: Unit Price ($), dtype: float64

In [83]:
df['Unit Price ($)'][0:4]*0.04

0     5.6728
1    10.6936
2    11.6800
3     9.8176
Name: Unit Price ($), dtype: float64

In [85]:
df['Amount ($)'][0:4]

0    709.10
1    534.68
2    292.00
3    981.76
Name: Amount ($), dtype: float64

In [88]:
df['Amount ($)'][0:4]/0.05

0    14182.0
1    10693.6
2     5840.0
3    19635.2
Name: Amount ($), dtype: float64

In [89]:
df.columns

Index(['Date', 'Product Name', 'Product Type', 'Brand', 'Gender', 'Category',
       'Country', 'Quantity', 'Unit Price ($)', 'Amount ($)', 'Payment Mode'],
      dtype='object')

In [91]:
df.Gender[0:4]

0    Unisex
1    Unisex
2     Women
3       Men
Name: Gender, dtype: object

In [99]:
myseries=df['Brand'].value_counts()
myseries

Brand
Nike          107
Adidas         70
Off-White      48
Puma           41
Supreme        32
New Era        29
Essentials     26
Name: count, dtype: int64

In [100]:
myseries.index

Index(['Nike', 'Adidas', 'Off-White', 'Puma', 'Supreme', 'New Era',
       'Essentials'],
      dtype='object', name='Brand')

In [101]:
myseries.values

array([107,  70,  48,  41,  32,  29,  26])

# 3.Pandas DataFrame

# DataFrame: A two-dimensional data structure with labeled axes (rows and columns). It is similar to a spreadsheet or an SQL table.

In [104]:
Data={'Name':pd.Series(['Aaron','Michael','Jack','Donald','Sam','Smith']),
     'Age':[29,40,80,24,12,52],
     'Height (cm)':[180,160,171,173,189,190],
     'Job':['Data Analyst','Accountant','Shopkeeper','IT Technician','HR Manager','Mayor'],
     'City':pd.Series(['Austin','Paris','New York','Dallas','Kerala','Kentucky'])}
Data

{'Name': 0      Aaron
 1    Michael
 2       Jack
 3     Donald
 4        Sam
 5      Smith
 dtype: object,
 'Age': [29, 40, 80, 24, 12, 52],
 'Height (cm)': [180, 160, 171, 173, 189, 190],
 'Job': ['Data Analyst',
  'Accountant',
  'Shopkeeper',
  'IT Technician',
  'HR Manager',
  'Mayor'],
 'City': 0      Austin
 1       Paris
 2    New York
 3      Dallas
 4      Kerala
 5    Kentucky
 dtype: object}

In [105]:
Frame=pd.DataFrame(Data)
Frame

Unnamed: 0,Name,Age,Height (cm),Job,City
0,Aaron,29,180,Data Analyst,Austin
1,Michael,40,160,Accountant,Paris
2,Jack,80,171,Shopkeeper,New York
3,Donald,24,173,IT Technician,Dallas
4,Sam,12,189,HR Manager,Kerala
5,Smith,52,190,Mayor,Kentucky


In [106]:
Frame[['Name','Job']]

Unnamed: 0,Name,Job
0,Aaron,Data Analyst
1,Michael,Accountant
2,Jack,Shopkeeper
3,Donald,IT Technician
4,Sam,HR Manager
5,Smith,Mayor


In [107]:
df[0:5]

Unnamed: 0,Date,Product Name,Product Type,Brand,Gender,Category,Country,Quantity,Unit Price ($),Amount ($),Payment Mode
0,2022-05-27,Fear of God Essentials Tee,T-shirt,Essentials,Unisex,Limited Edition,Japan,5,141.82,709.1,Card
1,2022-08-22,Yeezy Boost 350,Sneakers,Adidas,Unisex,Streetwear,USA,2,267.34,534.68,Cash on Delivery
2,2022-04-03,Nike Dunk Low,Sneakers,Nike,Women,Limited Edition,Germany,1,292.0,292.0,Cash on Delivery
3,2022-07-13,Nike Tech Fleece,Hoodie,Nike,Men,Limited Edition,USA,4,245.44,981.76,Cash on Delivery
4,2022-08-25,Jordan 1 High,Sneakers,Nike,Women,Casual,Australia,2,184.51,369.02,Cash on Delivery


In [109]:
df[["Gender",'Country']][0:5]

Unnamed: 0,Gender,Country
0,Unisex,Japan
1,Unisex,USA
2,Women,Germany
3,Men,USA
4,Women,Australia


In [111]:
df[['Brand','Gender','Country']]

Unnamed: 0,Brand,Gender,Country
0,Essentials,Unisex,Japan
1,Adidas,Unisex,USA
2,Nike,Women,Germany
3,Nike,Men,USA
4,Nike,Women,Australia
...,...,...,...
348,Nike,Men,India
349,Nike,Women,Germany
350,Puma,Women,Germany
351,Adidas,Men,Canada


# 4.DataFrame Operation

In [113]:
Frame

Unnamed: 0,Name,Age,Height (cm),Job,City
0,Aaron,29,180,Data Analyst,Austin
1,Michael,40,160,Accountant,Paris
2,Jack,80,171,Shopkeeper,New York
3,Donald,24,173,IT Technician,Dallas
4,Sam,12,189,HR Manager,Kerala
5,Smith,52,190,Mayor,Kentucky


In [114]:
Frame[Frame['Age']>30]

Unnamed: 0,Name,Age,Height (cm),Job,City
1,Michael,40,160,Accountant,Paris
2,Jack,80,171,Shopkeeper,New York
5,Smith,52,190,Mayor,Kentucky


In [117]:
Frame[(Frame['Age']>20) & (Frame['Age']<50)]

Unnamed: 0,Name,Age,Height (cm),Job,City
0,Aaron,29,180,Data Analyst,Austin
1,Michael,40,160,Accountant,Paris
3,Donald,24,173,IT Technician,Dallas


In [120]:
Frame[Frame['City']=='New York']

Unnamed: 0,Name,Age,Height (cm),Job,City
2,Jack,80,171,Shopkeeper,New York


In [122]:
df[df['Country']=='USA']

Unnamed: 0,Date,Product Name,Product Type,Brand,Gender,Category,Country,Quantity,Unit Price ($),Amount ($),Payment Mode
1,2022-08-22,Yeezy Boost 350,Sneakers,Adidas,Unisex,Streetwear,USA,2,267.34,534.68,Cash on Delivery
3,2022-07-13,Nike Tech Fleece,Hoodie,Nike,Men,Limited Edition,USA,4,245.44,981.76,Cash on Delivery
8,2022-06-20,Jordan 1 High,Sneakers,Nike,Women,Streetwear,USA,3,199.47,598.41,Wallet
10,2022-06-25,Yeezy Boost 350,Sneakers,Adidas,Men,Limited Edition,USA,1,117.18,117.18,Cash on Delivery
14,2022-08-25,Adidas Ultraboost,Sneakers,Adidas,Unisex,Casual,USA,1,104.16,104.16,Wallet
22,2022-05-07,New Era Cap,Cap,New Era,Men,Sportswear,USA,3,295.25,885.75,Wallet
30,2022-04-26,Off-White Hoodie,Hoodie,Off-White,Unisex,Streetwear,USA,4,258.82,1035.28,Cash on Delivery
31,2022-02-24,Nike Dunk Low,Sneakers,Nike,Women,Sportswear,USA,2,203.05,406.1,Cash on Delivery
49,2022-08-21,Nike Dunk Low,Sneakers,Nike,Women,Streetwear,USA,5,183.55,917.75,Cash on Delivery
66,2022-08-04,Nike Tech Fleece,Hoodie,Nike,Women,Limited Edition,USA,2,269.88,539.76,Cash on Delivery


In [123]:
df[(df['Country']=='USA')&(df['Product Type']=='Sneakers')

Unnamed: 0,Date,Product Name,Product Type,Brand,Gender,Category,Country,Quantity,Unit Price ($),Amount ($),Payment Mode
1,2022-08-22,Yeezy Boost 350,Sneakers,Adidas,Unisex,Streetwear,USA,2,267.34,534.68,Cash on Delivery
8,2022-06-20,Jordan 1 High,Sneakers,Nike,Women,Streetwear,USA,3,199.47,598.41,Wallet
10,2022-06-25,Yeezy Boost 350,Sneakers,Adidas,Men,Limited Edition,USA,1,117.18,117.18,Cash on Delivery
14,2022-08-25,Adidas Ultraboost,Sneakers,Adidas,Unisex,Casual,USA,1,104.16,104.16,Wallet
31,2022-02-24,Nike Dunk Low,Sneakers,Nike,Women,Sportswear,USA,2,203.05,406.1,Cash on Delivery
49,2022-08-21,Nike Dunk Low,Sneakers,Nike,Women,Streetwear,USA,5,183.55,917.75,Cash on Delivery
79,2022-08-31,Yeezy Boost 350,Sneakers,Adidas,Men,Limited Edition,USA,1,56.49,56.49,UPI
86,2022-06-06,Jordan 1 High,Sneakers,Nike,Women,Limited Edition,USA,5,299.3,1496.5,Card
87,2022-08-29,Yeezy Boost 350,Sneakers,Adidas,Unisex,Streetwear,USA,4,162.85,651.4,UPI
113,2022-01-24,Adidas Ultraboost,Sneakers,Adidas,Men,Limited Edition,USA,3,101.39,304.17,Cash on Delivery


In [126]:
df[(df['Country']=='USA') & 
    (df['Product Type']=='Sneakers')][['Country','Product Type']][0:5]

Unnamed: 0,Country,Product Type
1,USA,Sneakers
8,USA,Sneakers
10,USA,Sneakers
14,USA,Sneakers
31,USA,Sneakers


In [128]:
df.describe()

Unnamed: 0,Quantity,Unit Price ($),Amount ($)
count,353.0,353.0,353.0
mean,3.107649,174.607054,547.034193
std,1.408083,71.62571,343.084461
min,1.0,50.1,50.1
25%,2.0,113.75,268.78
50%,3.0,175.51,494.4
75%,4.0,235.5,783.93
max,5.0,299.73,1496.5


In [130]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 353 entries, 0 to 352
Data columns (total 11 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Date            353 non-null    object 
 1   Product Name    353 non-null    object 
 2   Product Type    353 non-null    object 
 3   Brand           353 non-null    object 
 4   Gender          353 non-null    object 
 5   Category        353 non-null    object 
 6   Country         353 non-null    object 
 7   Quantity        353 non-null    int64  
 8   Unit Price ($)  353 non-null    float64
 9   Amount ($)      353 non-null    float64
 10  Payment Mode    353 non-null    object 
dtypes: float64(2), int64(1), object(8)
memory usage: 30.5+ KB


In [137]:
Frame[0:3]

Unnamed: 0,Name,Age,Height (cm),Job,City
0,Aaron,29,180,Data Analyst,Austin
1,Michael,40,160,Accountant,Paris
2,Jack,80,171,Shopkeeper,New York


In [136]:
Frame.drop(['Age','Height (cm)'],axis=1)[0:3]

Unnamed: 0,Name,Job,City
0,Aaron,Data Analyst,Austin
1,Michael,Accountant,Paris
2,Jack,Shopkeeper,New York


In [143]:
df[['Amount ($)','Unit Price ($)']].sum().to_frame()

Unnamed: 0,0
Amount ($),193103.07
Unit Price ($),61636.29


In [146]:
Frame

Unnamed: 0,Name,Age,Height (cm),Job,City
0,Aaron,29,180,Data Analyst,Austin
1,Michael,40,160,Accountant,Paris
2,Jack,80,171,Shopkeeper,New York
3,Donald,24,173,IT Technician,Dallas
4,Sam,12,189,HR Manager,Kerala
5,Smith,52,190,Mayor,Kentucky
