## Importing library

In [1]:
import pandas as pd
import numpy as np

## Creating a Pandas Dataframe

#### (1) Creating a blank dataframe

In [2]:
df1=pd.DataFrame()
print(df1)

Empty DataFrame
Columns: []
Index: []


#### (2) Creating a dataframe with columns

In [3]:
df1=pd.DataFrame({'Student Name':['A','B','C','D','E','F','G'],
                  'Maths':[92,58,75,89,63,75,86],
                  'Science':[89,67,85,49,76,83,87],
                  'Literature':[68,89,70,78,95,48,71]})

## Viewing Dataframe

In [4]:
print(df1.head())
#Note head function prints first 5 elements of the dataframe

  Student Name  Maths  Science  Literature
0            A     92       89          68
1            B     58       67          89
2            C     75       85          70
3            D     89       49          78
4            E     63       76          95


In [5]:
print(df1.tail())
#Note tail function prints last 5 elements in dataframe

  Student Name  Maths  Science  Literature
2            C     75       85          70
3            D     89       49          78
4            E     63       76          95
5            F     75       83          48
6            G     86       87          71


#### Printing statistics of the dataframe

In [6]:
print(df1.describe())

           Maths    Science  Literature
count   7.000000   7.000000    7.000000
mean   76.857143  76.571429   74.142857
std    13.005493  14.304511   15.377782
min    58.000000  49.000000   48.000000
25%    69.000000  71.500000   69.000000
50%    75.000000  83.000000   71.000000
75%    87.500000  86.000000   83.500000
max    92.000000  89.000000   95.000000


## Operations on Dataframe

#### (1) Retrieving the column names 

In [7]:
columns=df1.columns
print('Columns of the dataframe : ',np.asarray(columns))

#Note here numpy array is used to convert the dataframe series into array

Columns of the dataframe :  ['Student Name' 'Maths' 'Science' 'Literature']


#### (2) Retrieving a column data from the dataframe

In [8]:
#Printing the Literature marks column
print('Marks in Literature : ',np.asarray(df1['Literature']))

#Note here numpy array is used to convert the dataframe series into array

Marks in Literature :  [68 89 70 78 95 48 71]


#### (3) Retrieving a row data from the dataframe

In [9]:
#Printing the data of 3rd row
print('Data of third row : ',np.asarray(df1.iloc[2]))

#Note here numpy array is used to convert the dataframe series into array

Data of third row :  ['C' 75 85 70]


#### (4) Adding new column to an existing dataframe

In [10]:
print('Dataframe before addition')
print(df1.head())

print('Dataframe after addition')
df1['History']=[85,96,45,63,72,81,89]
print(df1.head())

Dataframe before addition
  Student Name  Maths  Science  Literature
0            A     92       89          68
1            B     58       67          89
2            C     75       85          70
3            D     89       49          78
4            E     63       76          95
Dataframe after addition
  Student Name  Maths  Science  Literature  History
0            A     92       89          68       85
1            B     58       67          89       96
2            C     75       85          70       45
3            D     89       49          78       63
4            E     63       76          95       72


#### (5) Adding new row to an existing dataframe

In [11]:
print('Dataframe before addition')
print(df1.head())

print('Dataframe after addition')
df1.loc[len(df1.index)]=['F',59,76,55,91]
print(df1.head(10))
#Note df1.head(10) means to print first 10 entries in the dataframe

Dataframe before addition
  Student Name  Maths  Science  Literature  History
0            A     92       89          68       85
1            B     58       67          89       96
2            C     75       85          70       45
3            D     89       49          78       63
4            E     63       76          95       72
Dataframe after addition
  Student Name  Maths  Science  Literature  History
0            A     92       89          68       85
1            B     58       67          89       96
2            C     75       85          70       45
3            D     89       49          78       63
4            E     63       76          95       72
5            F     75       83          48       81
6            G     86       87          71       89
7            F     59       76          55       91


## Loading dataset from different formats 

#### (1) Loading a csv file

In [12]:
df=pd.read_csv('data.csv')
# Here the name of the csv file is data.csv.
# Note that the csv file has to be on the same directory as the python notebook
df.head()

Unnamed: 0,Student Name,Maths,Science,Literature,History
0,A,92,89,68,85
1,B,58,67,89,96
2,C,75,85,70,45
3,D,89,49,78,63
4,E,63,76,95,72


#### (2) Loading an excel file

In [13]:
df=pd.read_excel('data.xlsx')
# Here the name of the excel file is data.xlsx.
# Note that the excel file is on the same directory as the python notebook
df.head()

Unnamed: 0,Student Name,Maths,Science,Literature,History
0,A,92,89,68,85
1,B,58,67,89,96
2,C,75,85,70,45
3,D,89,49,78,63
4,E,63,76,95,72


## Converting data frames into different files

#### Dataframe to excel

In [14]:
df1.to_excel('data.xlsx',index=False)

#### Dataframe to csv

In [15]:
df1.to_csv('data.csv',index=False)