# Pandas 

__Import Pandas __

In [None]:
import pandas as pd

In [None]:
data = {'Name':['Bill','Dan','Tony','Mark'],
         'Age':[28,29,31,27],   
         'Salary':[2000,2500,2100,2200]}
         
obj = pd.DataFrame(data)
obj

In [None]:
# Prints the data of a specific column
obj['Age']

In [None]:
# Prints the columns of the dataframe
obj.columns

In [None]:
# Returns the data as a 2d array
obj.values

In [None]:
# Drops a specific row
obj.drop(1)

In [None]:
# Drops a specific column
obj.drop('Age',axis=1)

__Indexing, Selection, and Filtering__

In [None]:
# Gives specified indexing to your rows
data = pd.DataFrame(data, 
    index=['emp1','emp2','emp3','emp4'])
data

In [None]:
# Indexing format
# data[rows,columns]


1. Column selection

In [None]:
data[['Age','Name']]

In [None]:
data.loc[:,['Age','Name']]

In [None]:
# If we don't know the column name
data.iloc[:,[0,1]]

2. Row Selection

In [None]:
# If we know the index name
# prints indexes 0,1,2
data.loc['emp1':'emp3',:]


In [None]:
# If we don't know index name
# prints indexes 0 and 1 excludes the last one
data.iloc[0:2,:]

3. Mixed selection

In [None]:
data.loc['emp1':'emp3',['Name','Age']]

In [None]:
# prints row no. 0,2,3 and column no. 0 and 2 
data.iloc[[0,2,3],[0,2]]

__Filtering Data__

In [None]:
# prints Name and Salary of those emloyees
# whose age is greater than 28
data.loc[data.Age > 28,['Name','Salary']]

__DESCRIPTIVE STATISTICS__

In [None]:
# prints multiple statistics
data.describe()

In [None]:
# prints minimum age of employee
data.loc[:,'Age'].min()


__HANDLING MISSING DATA__

In [None]:
data = pd.DataFrame([[2.3,3.3,float('nan')],[7.5,float('nan'),9.8],
             [float('nan'),2.2,6.8],[5.6,9.2,7.4],
              [float('nan'),float('nan'),float('nan')]])
data

1. Filtering missing data

In [None]:
# Drop rows with null values
data.dropna()

In [None]:
# Drop row with all null values
data.dropna(how='all')


2. Filling missing data


In [None]:
# Fill null values with 0
data.fillna(0)

In [None]:
# Fill null values with mean
data.fillna(data.mean())

In [None]:
# Null values for specific column
data.fillna({0:2.5, 1:3.0, 2:5.5})

__READING AND WRITING FILES__

In [None]:
# Reads csv file
data = pd.read_csv('data.csv')

In [None]:
# Prints top 5 rows
data.head()

File Type     Reader

* CSV:            read_csv
* JSON:           read_json 
* MS Excel:       read_excel
* SQL:            read_sql
* HTML:           read_html


In [None]:
# Saves the dataframe to a csv file
pd.DataFrame(data).to_csv('myfile.csv')      