# Creating Series and DataFrame

### Creating a Series

In [190]:

import pandas as pd

import warnings
warnings.filterwarnings('ignore')
#this command allows  you to view the entire content of the csv without using head()
pd.set_option("display.max_columns", None)
pd.set_option('display.width', 1000)


In [191]:
#creating a pandas Series using a python list

data = [1,2,3,4,5,6,7,8,9,10]
series = pd.Series(data)
#.head(n) prints out the first n rows
series.head(10)
#type() checks if the inputted data is a Series or a DataFrame
# type(series)



0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [192]:
#Creating a series using our serial number
series2 = pd.Series(data, index=["a", "b", "c", "d","e","f","g","h","i","j"])
series2.head(10)

a     1
b     2
c     3
d     4
e     5
f     6
g     7
h     8
i     9
j    10
dtype: int64

In [193]:
#Creating a series using python dictionary
data2 = {"a": 10, "b": 20, "c": 30}

series3 = pd.Series(data2)
series3.head()

a    10
b    20
c    30
dtype: int64

#### Hands on practice

In [194]:
# Question 1
#A list of 6 items with alphabet as the index
bucket = ["fan", "water", "bottle", "purse", "earbuds", "phone"]
series_bucket = pd.Series(bucket, index=['a', 'b', 'c', 'd', 'e', 'f'])
series_bucket.head()

a        fan
b      water
c     bottle
d      purse
e    earbuds
dtype: object

In [195]:
#Question 2
biodata = {"Name": "Esther", "Age":23, "Track": "AI_Eng", "State": "Ogun", "Height": 155}
series_biodata = pd.Series(biodata)
series_biodata.head()

Name      Esther
Age           23
Track     AI_Eng
State       Ogun
Height       155
dtype: object

### Creating a  DataFrame

In [196]:
#Creating a DataFrame
data = {
    "Name": ["Chris", "Ayo", "Chisom"],
    "Age": [26,24, 22],
    "Home_Town": ["Benin", "Ibadan", "Enugu"]
}
df = pd.DataFrame(data)
df.head()

Unnamed: 0,Name,Age,Home_Town
0,Chris,26,Benin
1,Ayo,24,Ibadan
2,Chisom,22,Enugu


In [197]:
#Using a list of dictionaries
data2 = [
    {"Name": "Chris", "Age": 26, "Home_Town":"Benin"},
    {"Name": "Ayo", "Age": 24, "Home_Town": "Ibadan"}, 
    {"Name": "Chisom", "Age": 22, "Home_Town": "Enugu"}
]
df2 = pd.DataFrame(data2)
df2.head()

Unnamed: 0,Name,Age,Home_Town
0,Chris,26,Benin
1,Ayo,24,Ibadan
2,Chisom,22,Enugu


In [198]:
#Using a list of lists
data3 = [
    ["Chris", 26, "Benin"],
    ["Ayo", 24, "Ibadan"],
    ["Chisom", 22, "Enugu"]
]
df3 = pd.DataFrame(data3, columns=["Name", "Age", "Home_Town"])
df3.head()

Unnamed: 0,Name,Age,Home_Town
0,Chris,26,Benin
1,Ayo,24,Ibadan
2,Chisom,22,Enugu


In [199]:
print(type(df))
print(type(df2))
print(type(df3))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.frame.DataFrame'>


#### Hands on practice

In [200]:
#Reading the csv file
df = pd.read_csv("biodata.csv")
df.head()
df.tail() #gives the last five rows of the dataset
df.info() #gives basic info about the data
df.describe() #give the statistical summary of the data
df.shape
df.columns #checks the columns names



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34 entries, 0 to 33
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Timestamp     34 non-null     object
 1   First Name    34 non-null     object
 2   Last Name     34 non-null     object
 3   Course Track  34 non-null     object
 4   City          34 non-null     object
 5   Gender        34 non-null     object
 6   Seat Number   34 non-null     int64 
 7   PC-Make       34 non-null     object
 8   PC - OS       34 non-null     object
 9   Feedback      34 non-null     object
dtypes: int64(1), object(9)
memory usage: 2.8+ KB


Index(['Timestamp', 'First Name', 'Last Name', 'Course Track', 'City', 'Gender', 'Seat Number', 'PC-Make', 'PC - OS', 'Feedback'], dtype='object')

#### Data Cleaning

In [201]:
#DATA CLEANING
#Handling missing values
df.isna().sum()
df.isnull().sum()
# df.fillna(0, inplace= False) #nothing happens since there are no null cells

Timestamp       0
First Name      0
Last Name       0
Course Track    0
City            0
Gender          0
Seat Number     0
PC-Make         0
PC - OS         0
Feedback        0
dtype: int64

In [212]:
#Finding and Handling duplicates
df.duplicated() #checks for duplicated instances
# df.drop_duplicates() #drops duplicates

0     False
1     False
2     False
3     False
4     False
5     False
6     False
7     False
8     False
9     False
10    False
11    False
12    False
13    False
14    False
15    False
16    False
17    False
18    False
19    False
20    False
21    False
22    False
23    False
24    False
25    False
26    False
27    False
28    False
29    False
30    False
31    False
32    False
33    False
dtype: bool

In [None]:
#Correcting Data Types
#this checks for the data type in the dataset
df.dtypes

#this changes the data type of the specified column to another data type
df["Seat Number"].astype(int)

#converts the datatype of the specified column to date/time format
pd.to_datetime(df["Timestamp"])

0    2025-09-11 12:55:34-01:00
1    2025-09-11 12:56:11-01:00
2    2025-09-11 12:57:08-01:00
3    2025-09-11 12:57:56-01:00
4    2025-09-11 12:58:41-01:00
5    2025-09-11 12:58:55-01:00
6    2025-09-11 12:58:55-01:00
7    2025-09-11 12:59:00-01:00
8    2025-09-11 12:59:16-01:00
9    2025-09-11 12:59:28-01:00
10   2025-09-11 12:59:41-01:00
11   2025-09-11 12:59:43-01:00
12   2025-09-11 13:00:03-01:00
13   2025-09-11 13:00:13-01:00
14   2025-09-11 13:00:27-01:00
15   2025-09-11 13:00:31-01:00
16   2025-09-11 13:00:43-01:00
17   2025-09-11 13:00:49-01:00
18   2025-09-11 13:00:54-01:00
19   2025-09-11 13:01:31-01:00
20   2025-09-11 13:01:44-01:00
21   2025-09-11 13:01:46-01:00
22   2025-09-11 13:01:54-01:00
23   2025-09-11 13:02:28-01:00
24   2025-09-11 13:02:49-01:00
25   2025-09-11 13:02:56-01:00
26   2025-09-11 13:02:59-01:00
27   2025-09-11 13:03:12-01:00
28   2025-09-11 13:03:36-01:00
29   2025-09-11 13:06:03-01:00
30   2025-09-11 13:06:48-01:00
31   2025-09-11 13:10:16-01:00
32   202

#### Data Selection and Filtering

In [None]:
#Column Viewing
#This prints out the column names
df.columns

Index(['Timestamp', 'First Name', 'Last Name', 'Course Track', 'City', 'Gender', 'Seat Number', 'PC-Make', 'PC - OS', 'Feedback'], dtype='object')

In [217]:
#Column Selection
#Looking through a single column

df["First Name"]

#Alternatively, dot can be used
df.Gender

0       Male
1     Female
2     Female
3       Male
4       Male
5       Male
6     Female
7     Female
8     Female
9       Male
10    Female
11    Female
12      Male
13      Male
14      Male
15      Male
16      Male
17      Male
18      Male
19      Male
20      Male
21      Male
22      Male
23      Male
24      Male
25      Male
26      Male
27    Female
28      Male
29      Male
30      Male
31      Male
32      Male
33      Male
Name: Gender, dtype: object

In [220]:
#Selecting multiple column
df[["First Name", "Last Name", "Feedback"]]

Unnamed: 0,First Name,Last Name,Feedback
0,Peter,Okonmah,non
1,Toyeebat,Nababa,Excellent
2,Perpetual,Meninwa,Thank you so much for the opportunity.
3,Mahfuz,Abdulhameed,Amazing Shit
4,Divine,Gbadamosi,Brain Racking
5,Abdulmalik,Adedotun,Enjoying the course so far
6,Naheemot,Adebiyi,Grateful for the opportunity to be here.
7,Kanyisola,Fagbayi,One chin chin for you for this form
8,Blessing,James,Thanks for creating the form.
9,Hannah,Tanimola,On God


In [None]:
#Selecting a single cell
#this prints out the first name with index number 2
df["First Name"][2]

#other methods
#print out the name with index number 0
df.at[0, "First Name"]

#prints out the first element in the first row and second column
df.iat[0,1]

'Peter'