# Pandas

Pandas is an open source, BSD-licenced library providing high-performance, easy-to-use data structures and data-analysis tools for the python programming language

In [1]:
# for installing pandas library

!pip install pandas



In [7]:
import pandas as pd

# Series

In [20]:
 # Creating series from tuple
character_series = pd.Series(('l', 'u', 't', 'h', 'o', 'r'))
print('series:', character_series, sep='\n')

print('type:', type(character_series))

series:
0    l
1    u
2    t
3    h
4    o
5    r
dtype: object
type: <class 'pandas.core.series.Series'>


In [19]:
# creating list from a list
integer_series = pd.Series([11, 12, 13, 14, 15, 16])
print('series:', integer_series, sep='\n')
print('type:', type(integer_series))


series:
0    11
1    12
2    13
3    14
4    15
5    16
dtype: int64
type: <class 'pandas.core.series.Series'>


In [27]:
# Creating a date series
# By default - date format is --> mm-dd-yyyy 



date_series = pd.date_range(start = '01-01-2020', end='01-04-2020')
print(date_series, sep= '\n')
print(type(date_series))

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04'], dtype='datetime64[ns]', freq='D')
<class 'pandas.core.indexes.datetimes.DatetimeIndex'>


In [40]:
print("year: ", date_series.year)
print("month: ", date_series.month)
print("day: ", date_series.day)
print("month_name: ", date_series.month_name())
print("day_name: ", date_series.day_name())

year:  Int64Index([2020, 2020, 2020, 2020], dtype='int64')
month:  Int64Index([1, 1, 1, 1], dtype='int64')
day:  Int64Index([1, 2, 3, 4], dtype='int64')
month_name:  Index(['January', 'January', 'January', 'January'], dtype='object')
day_name:  Index(['Wednesday', 'Thursday', 'Friday', 'Saturday'], dtype='object')


In [48]:
# Indexing
integer_series = pd.Series([11, 12, 13, 14, 15, 16])
print("The 0th number: ",integer_series[0])
print("The 1st 4 element in the series: ", integer_series[0:4], sep='\n')
print("The 1st and 4th element in the series: ", integer_series[[0, 4]], sep='\n')

The 0th number:  11
The 1st 4 element in the series: 
0    11
1    12
2    13
3    14
dtype: int64
The 1st and 4th element in the series: 
0    11
4    15
dtype: int64


In [53]:
# How do we rename the indexes, we can restructure the index to a value you want
integer_series = pd.Series([11, 12, 13, 14, 15, 16], index = [101, 102, 103, 104, 105, 106])
print("new integer:", integer_series, sep="\n")

print("The 0th element: ", integer_series[101])

new integer:
101    11
102    12
103    13
104    14
105    15
106    16
dtype: int64
The 0th element:  11


In [58]:
# Another way to perform indexing method)

integer_series = pd.Series([11, 12, 13, 14, 15, 16], index=range(101, 107))
print(integer_series)

101    11
102    12
103    13
104    14
105    15
106    16
dtype: int64


# Data Frames

In [70]:
# Data frames are more of like tables
# Creating a dataframe from a dictionary
student_data = {"Names": ["Student_1","Student_2", "Student_3", "Student_4", "Student_5", "Student_6"],"Roll number": [1, 5, 10, 8, 3, 7],
             "Math marks": [88, 89, 90, 78, 66, 58]}
print(pd.DataFrame(student_data))

       Names  Roll number  Math marks
0  Student_1            1          88
1  Student_2            5          89
2  Student_3           10          90
3  Student_4            8          78
4  Student_5            3          66
5  Student_6            7          58


In [71]:
# Or we could do it this way
student_df = pd.DataFrame({"Names": ["Student_1","Student_2", "Student_3", "Student_4", "Student_5", "Student_6"],"Roll number": [1, 5, 10, 8, 3, 7], 
             "Math marks": [88, 89, 90, 78, 66, 58]})
print(student_df)

       Names  Roll number  Math marks
0  Student_1            1          88
1  Student_2            5          89
2  Student_3           10          90
3  Student_4            8          78
4  Student_5            3          66
5  Student_6            7          58


In [74]:
# Reindexing according to row number
roll_number = [1, 5, 10, 8, 3, 7]
pd.DataFrame({"Names": ["Student_1","Student_2", "Student_3", "Student_4", "Student_5", "Student_6"], 
             "Math marks": [88, 89, 90, 78, 66, 58]})

Unnamed: 0,Names,Math marks
0,Student_1,88
1,Student_2,89
2,Student_3,90
3,Student_4,78
4,Student_5,66
5,Student_6,58


In [90]:
# Reading a csv file into a dataframe
# you can use similar functions for the other structured data file+

dummy_df = pd.read_csv("C:Desktop/Book1.csv")
# dummy_df = dummy_df.set_index('id')
dummy_df

Unnamed: 0,First name,last_name,Gender
0,Sikirulahi,Opeyemi,M
1,Tijani,Adk,F
2,Owusu,Samuel,M
3,Onasanwo,Favour,F
4,Dipeolu,Ayomide,M


In [2]:
import pandas as pd

In [29]:
dummy_df = pd.read_csv("C:Desktop/Book1.csv")
dummy_df = dummy_df.set_index('id')
print(dummy_df)

   first_name  last_name       Gender
id                                   
1        Dodi   McCurlay         Male
2        Poch     Casado  Genderqueer
3       Knspn  Goviniock      Agender
4     Tighany     Dabney     Bigender
5       Derry     Fehner  Genderfluid
6    Herberto  Behninck      Bigender
7      Michal       Gath       Female
8      Stella   Shadwick  Genderfluid
9    Consuelo       Asty   Polygender
10     Amabel  Moortimer         Male


In [26]:
# Just view the first few records
dummy_df.head(6)

Unnamed: 0_level_0,first_name,last_name,Gender
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Dodi,McCurlay,Male
2,Poch,Casado,Genderqueer
3,Knspn,Goviniock,Agender
4,Tighany,Dabney,Bigender
5,Derry,Fehner,Genderfluid
6,Herberto,Behninck,Bigender


In [31]:
dummy_df.head(2)

Unnamed: 0_level_0,first_name,last_name,Gender
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Dodi,McCurlay,Male
2,Poch,Casado,Genderqueer


In [33]:
# Just view the last few records
dummy_df.tail(6)

Unnamed: 0_level_0,first_name,last_name,Gender
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
5,Derry,Fehner,Genderfluid
6,Herberto,Behninck,Bigender
7,Michal,Gath,Female
8,Stella,Shadwick,Genderfluid
9,Consuelo,Asty,Polygender
10,Amabel,Moortimer,Male


In [35]:
# Information about the data
dummy_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10 entries, 1 to 10
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   first_name  10 non-null     object
 1   last_name   10 non-null     object
 2   Gender      10 non-null     object
dtypes: object(3)
memory usage: 320.0+ bytes


In [37]:
# Shape of the data
dummy_df.shape

(10, 3)

In [40]:
dummy_df.describe()

Unnamed: 0,first_name,last_name,Gender
count,10,10,10
unique,10,10,7
top,Dodi,McCurlay,Male
freq,1,1,2


In [45]:
# Names of columns present
dummy_df.columns

Index(['first_name', 'last_name', 'Gender'], dtype='object')

In [46]:
type(dummy_df.columns)

pandas.core.indexes.base.Index

In [47]:
list(dummy_df.columns)

['first_name', 'last_name', 'Gender']

In [55]:
# To print the array of the rows
print("All rows: ", dummy_df.values, sep='\n', end='\n\n')

# You can also index the values
print("row in the zeroth index: ", dummy_df.values[0])

# To give the 0th value
print("zeroth value: ", dummy_df.values[0, 0])

All rows: 
[['Dodi' 'McCurlay' 'Male']
 ['Poch' 'Casado' 'Genderqueer']
 ['Knspn' 'Goviniock' 'Agender']
 ['Tighany' 'Dabney' 'Bigender']
 ['Derry' 'Fehner' 'Genderfluid']
 ['Herberto' 'Behninck ' 'Bigender']
 ['Michal' 'Gath' 'Female']
 ['Stella' 'Shadwick' 'Genderfluid']
 ['Consuelo' 'Asty' 'Polygender']
 ['Amabel' 'Moortimer' 'Male']]

row in the zeroth index:  ['Dodi' 'McCurlay' 'Male']
zeroth value:  Dodi


In [57]:
# To count the number or non-null values in each row or column
dummy_df.count()

first_name    10
last_name     10
Gender        10
dtype: int64

In [61]:
# Gives you the unique count of all the unique value present in the column
dummy_df['Gender'].value_counts()

Male           2
Bigender       2
Genderfluid    2
Genderqueer    1
Agender        1
Female         1
Polygender     1
Name: Gender, dtype: int64

In [64]:
dummy_df

Unnamed: 0_level_0,first_name,last_name,Gender
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Dodi,McCurlay,Male
2,Poch,Casado,Genderqueer
3,Knspn,Goviniock,Agender
4,Tighany,Dabney,Bigender
5,Derry,Fehner,Genderfluid
6,Herberto,Behninck,Bigender
7,Michal,Gath,Female
8,Stella,Shadwick,Genderfluid
9,Consuelo,Asty,Polygender
10,Amabel,Moortimer,Male


In [63]:
# For accessing the rows in a table
dummy_df[0:1]

Unnamed: 0_level_0,first_name,last_name,Gender
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Dodi,McCurlay,Male


In [72]:
# The number in the column denotes the index number
dummy_df[1:3]

Unnamed: 0_level_0,first_name,last_name,Gender
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,Poch,Casado,Genderqueer
3,Knspn,Goviniock,Agender


In [73]:
dummy_df[:5]

Unnamed: 0_level_0,first_name,last_name,Gender
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,Dodi,McCurlay,Male
2,Poch,Casado,Genderqueer
3,Knspn,Goviniock,Agender
4,Tighany,Dabney,Bigender
5,Derry,Fehner,Genderfluid


In [93]:
# It works just like the indexing you know
# It prints fro, index 1 to 8 with a step of 2
dummy_df[1:8:2]


Unnamed: 0_level_0,first_name,last_name,Gender
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2,Poch,Casado,Genderqueer
4,Tighany,Dabney,Bigender
6,Herberto,Behninck,Bigender
8,Stella,Shadwick,Genderfluid


In [79]:
# Selecting columns
# First method (Prefered)
print("Series output: ",dummy_df['Gender'],sep='\n')
print(type(dummy_df['Gender']))

Series output: 
id
1            Male
2     Genderqueer
3         Agender
4        Bigender
5     Genderfluid
6        Bigender
7          Female
8     Genderfluid
9      Polygender
10           Male
Name: Gender, dtype: object
<class 'pandas.core.series.Series'>


In [81]:
# Second method
print("Series Output: ",dummy_df.Gender, sep='\n')
print("Type: ", type(dummy_df.Gender))

Series Output: 
id
1            Male
2     Genderqueer
3         Agender
4        Bigender
5     Genderfluid
6        Bigender
7          Female
8     Genderfluid
9      Polygender
10           Male
Name: Gender, dtype: object
Type:  <class 'pandas.core.series.Series'>


In [90]:
print("Data Frame Output: ",dummy_df[['Gender','first_name']], sep='\n')
print()
print("Type: ", type(dummy_df))

Data Frame Output: 
         Gender first_name
id                        
1          Male       Dodi
2   Genderqueer       Poch
3       Agender      Knspn
4      Bigender    Tighany
5   Genderfluid      Derry
6      Bigender   Herberto
7        Female     Michal
8   Genderfluid     Stella
9    Polygender   Consuelo
10         Male     Amabel

Type:  <class 'pandas.core.frame.DataFrame'>


In [None]:

9