## Data Manipulation and Analysis with Pandas
Pandas is a powerful library for data manipulation and analysis in Python. It provides data structures and functions to efficiently handle structured data, including tabular data such as spreadsheets and SQL tables.

In [1]:
import pandas as pd 

In [3]:
df = pd.read_csv('data.csv')

In [5]:
df.head()

Unnamed: 0,Date,Category,Value,Product,Sales,Region
0,2023-01-01,A,28.0,Product1,754.0,East
1,2023-01-02,B,39.0,Product3,110.0,North
2,2023-01-03,C,32.0,Product2,398.0,East
3,2023-01-04,B,8.0,Product1,522.0,East
4,2023-01-05,B,26.0,Product3,869.0,North


In [6]:
df.tail()

Unnamed: 0,Date,Category,Value,Product,Sales,Region
45,2023-02-15,B,99.0,Product2,599.0,West
46,2023-02-16,B,6.0,Product1,938.0,South
47,2023-02-17,B,69.0,Product3,143.0,West
48,2023-02-18,C,65.0,Product3,182.0,North
49,2023-02-19,C,11.0,Product3,708.0,North


In [7]:
df.describe()

Unnamed: 0,Value,Sales
count,47.0,46.0
mean,51.744681,557.130435
std,29.050532,274.598584
min,2.0,108.0
25%,27.5,339.0
50%,54.0,591.5
75%,70.0,767.5
max,99.0,992.0


In [10]:
df.isnull().any(axis=1)

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11     True
12    False
13    False
14    False
15     True
16    False
17     True
18    False
19    False
20    False
21    False
22    False
23    False
24    False
25    False
26    False
27    False
28     True
29    False
30    False
31    False
32    False
33     True
34    False
35     True
36    False
37     True
38    False
39    False
40    False
41    False
42    False
43    False
44    False
45    False
46    False
47    False
48    False
49    False
dtype: bool

In [11]:
df.isnull().sum()

Date        0
Category    0
Value       3
Product     0
Sales       4
Region      0
dtype: int64

In [14]:
df['New values']  = df['Value'].apply( lambda x: x*2)
df.head()

Unnamed: 0,Date,Category,Value,Product,Sales,Region,New values
0,2023-01-01,A,28.0,Product1,754.0,East,56.0
1,2023-01-02,B,39.0,Product3,110.0,North,78.0
2,2023-01-03,C,32.0,Product2,398.0,East,64.0
3,2023-01-04,B,8.0,Product1,522.0,East,16.0
4,2023-01-05,B,26.0,Product3,869.0,North,52.0


In [18]:
# Aggregation and grouping
df.head()

Unnamed: 0,Date,Category,Value,Product,Sales,Region,New values
0,2023-01-01,A,28.0,Product1,754.0,East,56.0
1,2023-01-02,B,39.0,Product3,110.0,North,78.0
2,2023-01-03,C,32.0,Product2,398.0,East,64.0
3,2023-01-04,B,8.0,Product1,522.0,East,16.0
4,2023-01-05,B,26.0,Product3,869.0,North,52.0


In [19]:
group_mean = df.groupby('Product')['Value'].mean()
print(group_mean)

Product
Product1    46.214286
Product2    52.800000
Product3    55.166667
Name: Value, dtype: float64


In [20]:
df.groupby(['Product','Region'])['Value'].sum()

Product   Region
Product1  East      292.0
          North       9.0
          South     100.0
          West      246.0
Product2  East       56.0
          North     127.0
          South     181.0
          West      428.0
Product3  East      202.0
          North     203.0
          South     215.0
          West      373.0
Name: Value, dtype: float64