# Pandas in Python

Pandas is a software library written for the Python programming language for data manipulation and analysis. In particular, it offers data structures and operations for manipulating numerical tables and time series. It is free software released under the three-clause BSD license.

In [None]:
!pip install pandas 

In [1]:
import pandas as pd

### Creating DataFrames

#### From Lists

In [2]:
df = pd.DataFrame([['Bob', 55],
                   ['Vien', 52]])

In [3]:
df

Unnamed: 0,0,1
0,Bob,55
1,Vien,52


In [4]:
type(df)

pandas.core.frame.DataFrame

In [5]:
print(df)

      0   1
0   Bob  55
1  Vien  52


In [6]:
df = pd.read_csv('Book1.csv')

pd.read_excel()
pd.read_json()

In [7]:
df

Unnamed: 0,Revenue,profit
0,3000,6000
1,4000,10000
2,5000,15000


## Indexing in DataFrame

In [8]:
df['Revenue']

0    3000
1    4000
2    5000
Name: Revenue, dtype: int64

In [9]:
df[['Revenue','profit']]

Unnamed: 0,Revenue,profit
0,3000,6000
1,4000,10000
2,5000,15000


In [10]:
df.shape

(3, 2)

In [11]:
df.head()

Unnamed: 0,Revenue,profit
0,3000,6000
1,4000,10000
2,5000,15000


In [12]:
df.tail()

Unnamed: 0,Revenue,profit
0,3000,6000
1,4000,10000
2,5000,15000


In [13]:
df.columns

Index(['Revenue', 'profit'], dtype='object')

In [16]:
df['Revenue'].max()

5000

In [17]:
df['Revenue'].std()

1000.0

In [18]:
df.mean()

Revenue     4000.000000
profit     10333.333333
dtype: float64

In [19]:
df.describe()

Unnamed: 0,Revenue,profit
count,3.0,3.0
mean,4000.0,10333.333333
std,1000.0,4509.249753
min,3000.0,6000.0
25%,3500.0,8000.0
50%,4000.0,10000.0
75%,4500.0,12500.0
max,5000.0,15000.0


In [20]:
df.set_index('profit')

Unnamed: 0_level_0,Revenue
profit,Unnamed: 1_level_1
6000,3000
10000,4000
15000,5000


In [27]:
weather_data = {
    'Day':['23/4/2020','24/4/2020','25/4/2020','26/4/2020','27/4/2020','28/4/2020','29/4/2020','30/4/2020'],
    'Temperature':[32,35,28,29,30,31,27,28],
    'Windspeed':[6,7,2,7,2,6,3,4],
    'Event': ['Sunny', 'Rain', 'Rain','Rain','Snow', 'Sunny','Snow','Sunny']
}

In [28]:
df = pd.DataFrame(weather_data)

In [29]:
df

Unnamed: 0,Day,Temperature,Windspeed,Event
0,23/4/2020,32,6,Sunny
1,24/4/2020,35,7,Rain
2,25/4/2020,28,2,Rain
3,26/4/2020,29,7,Rain
4,27/4/2020,30,2,Snow
5,28/4/2020,31,6,Sunny
6,29/4/2020,27,3,Snow
7,30/4/2020,28,4,Sunny


In [32]:
df.set_index('Event',inplace=True)

In [33]:
df

Unnamed: 0_level_0,Day,Temperature,Windspeed
Event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sunny,23/4/2020,32,6
Rain,24/4/2020,35,7
Rain,25/4/2020,28,2
Rain,26/4/2020,29,7
Snow,27/4/2020,30,2
Sunny,28/4/2020,31,6
Snow,29/4/2020,27,3
Sunny,30/4/2020,28,4


In [34]:
df.loc['Rain']

Unnamed: 0_level_0,Day,Temperature,Windspeed
Event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,24/4/2020,35,7
Rain,25/4/2020,28,2
Rain,26/4/2020,29,7


In [35]:
df.reset_index(inplace=True)

In [36]:
df

Unnamed: 0,Event,Day,Temperature,Windspeed
0,Sunny,23/4/2020,32,6
1,Rain,24/4/2020,35,7
2,Rain,25/4/2020,28,2
3,Rain,26/4/2020,29,7
4,Snow,27/4/2020,30,2
5,Sunny,28/4/2020,31,6
6,Snow,29/4/2020,27,3
7,Sunny,30/4/2020,28,4


### Methods and Attributes in DataFrame

Max and Min

In [None]:
df['Revenue'].max()

In [None]:
df['Revenue'].min()

Describing the dataframe

In [None]:
df.describe()

In [None]:
print(df.columns)

In [37]:
df.shape

(8, 4)

In [38]:
df.size

32

In [39]:
df.values

array([['Sunny', '23/4/2020', 32, 6],
       ['Rain', '24/4/2020', 35, 7],
       ['Rain', '25/4/2020', 28, 2],
       ['Rain', '26/4/2020', 29, 7],
       ['Snow', '27/4/2020', 30, 2],
       ['Sunny', '28/4/2020', 31, 6],
       ['Snow', '29/4/2020', 27, 3],
       ['Sunny', '30/4/2020', 28, 4]], dtype=object)

#### Slicing DataFrame using comparision operator

In [41]:
df[df['Temperature']>32]

Unnamed: 0,Event,Day,Temperature,Windspeed
1,Rain,24/4/2020,35,7


### Converting DataFrame to NumPy ndarray

In [42]:
df.as_matrix()

  """Entry point for launching an IPython kernel.


array([['Sunny', '23/4/2020', 32, 6],
       ['Rain', '24/4/2020', 35, 7],
       ['Rain', '25/4/2020', 28, 2],
       ['Rain', '26/4/2020', 29, 7],
       ['Snow', '27/4/2020', 30, 2],
       ['Sunny', '28/4/2020', 31, 6],
       ['Snow', '29/4/2020', 27, 3],
       ['Sunny', '30/4/2020', 28, 4]], dtype=object)

## Importing csv in customized format

In [None]:
iris_data = pd.read_csv('iris.csv')

In [None]:
print(iris_data.head())

In [None]:
iris_data = pd.read_csv('iris.csv', skiprows = 1, usecols = [0, 1, 2, 3], 
                        names = ['Sepal-Length', 'Spepal-Width', 'Petal-Length', 'Petal-Widht'])

In [None]:
print(iris_data.head())