# Pandas Basics 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df=pd.DataFrame({'Name': ['Ninad','Ajinkya','Roman'],
    'Age':[21,43,23],
    'Location':['Wadala','Dombivili','Chembur'],
                'Salary':[15000,32000,25000]})

#DataFrame is simply like a excel sheet

In [3]:
df

Unnamed: 0,Name,Age,Location,Salary
0,Ninad,21,Wadala,15000
1,Ajinkya,43,Dombivili,32000
2,Roman,23,Chembur,25000


In [4]:
df.columns

Index(['Name', 'Age', 'Location', 'Salary'], dtype='object')

In [5]:
df.index

RangeIndex(start=0, stop=3, step=1)

## To convert DataFrame file in csv

In [6]:
#to convert this file in csv

df.to_csv('NewFile.csv')
#it will be created in the same folder

In [7]:
df.describe()  #stats about numerical column

Unnamed: 0,Age,Salary
count,3.0,3.0
mean,29.0,24000.0
std,12.165525,8544.003745
min,21.0,15000.0
25%,22.0,20000.0
50%,23.0,25000.0
75%,33.0,28500.0
max,43.0,32000.0


In [8]:
df.count()

Name        3
Age         3
Location    3
Salary      3
dtype: int64

## Change specific value from dataframe

In [9]:
A=df['Salary'][0]
print(A)
#change value from dataframe

B=df['Salary'][0]=17000
print(B)

15000
17000


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  B=df['Salary'][0]=17000


In [10]:
df

Unnamed: 0,Name,Age,Location,Salary
0,Ninad,21,Wadala,17000
1,Ajinkya,43,Dombivili,32000
2,Roman,23,Chembur,25000


In [11]:
#to remove index 

df.to_csv('NewFile1.csv',index=False)
# df will not show index values in csv file

In [12]:
#to change index values

df.index=[1,2,3]
df

Unnamed: 0,Name,Age,Location,Salary
1,Ninad,21,Wadala,17000
2,Ajinkya,43,Dombivili,32000
3,Roman,23,Chembur,25000


## Multiple Series makes one DataFrame

In [13]:
C=pd.Series(np.random.rand(7))
C

0    0.723877
1    0.429061
2    0.334605
3    0.486212
4    0.602286
5    0.949713
6    0.715659
dtype: float64

In [14]:
type(C)

pandas.core.series.Series

In [15]:
index=[1,2,3,4,5]
D=pd.DataFrame(np.random.rand(5,3),index=index)
D

Unnamed: 0,0,1,2
1,0.519148,0.585748,0.471884
2,0.570717,0.501869,0.150706
3,0.452569,0.764782,0.12167
4,0.37849,0.201825,0.616656
5,0.824638,0.537972,0.930496


In [16]:
type(D)

pandas.core.frame.DataFrame

In [17]:
D.dtypes

0    float64
1    float64
2    float64
dtype: object

## To convert DataFrame to numpy array

In [18]:
#to convert DataFrame to numpy array

D.to_numpy()

array([[0.51914827, 0.58574805, 0.47188385],
       [0.57071666, 0.5018691 , 0.15070633],
       [0.45256903, 0.76478181, 0.12167035],
       [0.37849037, 0.20182498, 0.61665631],
       [0.82463771, 0.53797153, 0.93049603]])

In [19]:
E=D.T  #Transpose
E

Unnamed: 0,1,2,3,4,5
0,0.519148,0.570717,0.452569,0.37849,0.824638
1,0.585748,0.501869,0.764782,0.201825,0.537972
2,0.471884,0.150706,0.12167,0.616656,0.930496


In [20]:
F=D.sort_index(axis=0,ascending=False)
F
#axis 0 is row and axis 1 is column

Unnamed: 0,0,1,2
5,0.824638,0.537972,0.930496
4,0.37849,0.201825,0.616656
3,0.452569,0.764782,0.12167
2,0.570717,0.501869,0.150706
1,0.519148,0.585748,0.471884


In [21]:
G=D.sort_index(axis=1,ascending=False)
G

Unnamed: 0,2,1,0
1,0.471884,0.585748,0.519148
2,0.150706,0.501869,0.570717
3,0.12167,0.764782,0.452569
4,0.616656,0.201825,0.37849
5,0.930496,0.537972,0.824638


In [22]:
type(G[2])

pandas.core.series.Series

In [23]:
type(G)

pandas.core.frame.DataFrame

## Change specific value from dataframe using loc

In [24]:
D

Unnamed: 0,0,1,2
1,0.519148,0.585748,0.471884
2,0.570717,0.501869,0.150706
3,0.452569,0.764782,0.12167
4,0.37849,0.201825,0.616656
5,0.824638,0.537972,0.930496


In [27]:
H=D.copy()
D.loc[1,1]=9876
D

Unnamed: 0,0,1,2
1,0.519148,9876.0,0.471884
2,0.570717,0.501869,0.150706
3,0.452569,0.764782,0.12167
4,0.37849,0.201825,0.616656
5,0.824638,0.537972,0.930496


## Giving Names to columns

In [33]:
D.columns=list(['A','B','C'])
D

Unnamed: 0,A,B,C
1,0.519148,9876.0,0.471884
2,0.570717,0.501869,0.150706
3,0.452569,0.764782,0.12167
4,0.37849,0.201825,0.616656
5,0.824638,0.537972,0.930496


In [37]:
D=D.drop(5,axis=0)  #Dropped 5th row

#inplace=True means droping from original DataFrame

## Specific display of values with one condition

In [40]:
D.loc[(D['B']>0.3)] 

Unnamed: 0,A,B,C
1,0.519148,9876.0,0.471884
2,0.570717,0.501869,0.150706
3,0.452569,0.764782,0.12167


## Specific display of values with many condition

In [41]:
D.loc[(D['B']>0.3) & (D['C']<0.47)] 

Unnamed: 0,A,B,C
2,0.570717,0.501869,0.150706
3,0.452569,0.764782,0.12167


In [46]:
#iloc takes numbers instead of column names
#0,1,2,3....