# pandas Basics

In [1]:
import numpy as np
import pandas as pd

### Creating a Series

A series contains one dimension, like a column.

In [2]:
a_list = [3, 217, 182]
a_list

[3, 217, 182]

In [3]:
a_index = ['a','b','c']

In [4]:
a = pd.Series(a_list, index=a_index)
a

a      3
b    217
c    182
dtype: int64

In [5]:
a_no_index = pd.Series(a_list)
a_no_index

0      3
1    217
2    182
dtype: int64

In [6]:
b_dict = {'cat': 'Norma', 'dog': 'Fozzie', 'fish': 'Al'}
b_dict

{'cat': 'Norma', 'dog': 'Fozzie', 'fish': 'Al'}

In [7]:
b = pd.Series(b_dict)
b

cat      Norma
dog     Fozzie
fish        Al
dtype: object

### Creating a DataFrame
A DataFrame contains two dimensions (rows and columns) and displays as a table.

In [8]:
data = {
    "product_num": [1658, 2476, 3911],
    "quantity": [120, 54, 98]
}

In [9]:
df = pd.DataFrame(data)
print(df)

   product_num  quantity
0         1658       120
1         2476        54
2         3911        98


In [10]:
df

Unnamed: 0,product_num,quantity
0,1658,120
1,2476,54
2,3911,98


In [11]:
# You can also export a DataFrame to a string

df.to_string()

'   product_num  quantity\n0         1658       120\n1         2476        54\n2         3911        98'

In [12]:
df01 = pd.DataFrame((1,2,3,4)) 
df01

Unnamed: 0,0
0,1
1,2
2,3
3,4


In [13]:
arr = np.array([1,2,3,4]) 
df02 = pd.DataFrame(arr) 
df02

Unnamed: 0,0
0,1
1,2
2,3
3,4


In [14]:
ser = pd.Series(arr) 
df03 = pd. DataFrame(ser)
df03

Unnamed: 0,0
0,1
1,2
2,3
3,4


In [15]:
arr2 = np.array([[1,2,3,4]]) 
df04 = pd.DataFrame(arr2)
df04

Unnamed: 0,0,1,2,3
0,1,2,3,4


In [16]:
df04.insert(2, 'A', ['a'])
df04.insert(5, 'D', ['d'])
df04

Unnamed: 0,0,1,A,2,3,D
0,1,2,a,3,4,d


In [17]:
# NumPy Sampling -- using .arange() and .reshape() to populate a DataFrame

sampledata = pd.DataFrame(np.arange(8).reshape((2,4)), index=['apple','banana'], columns=['a','b','c','d'])
sampledata

Unnamed: 0,a,b,c,d
apple,0,1,2,3
banana,4,5,6,7


In [18]:
sampledata['banana':]

Unnamed: 0,a,b,c,d
banana,4,5,6,7


### Using .loc[] and .iloc[]

The .loc[] attribute works with the named index of a DataFrame. The .iloc[] attribute uses the underlying numbered index.

In [19]:
data = { "product_num": [1658, 2476, 3911], "quantity": [120, 54, 98] } 
inventory = pd.DataFrame(data)
inventory

Unnamed: 0,product_num,quantity
0,1658,120
1,2476,54
2,3911,98


In [20]:
print(type(inventory.loc[1]))
inventory.loc[1]

<class 'pandas.core.series.Series'>


product_num    2476
quantity         54
Name: 1, dtype: int64

In [21]:
print(type(inventory.loc[1:2]))
inventory.loc[1:2]

<class 'pandas.core.frame.DataFrame'>


Unnamed: 0,product_num,quantity
1,2476,54
2,3911,98


In [22]:
sampledata

Unnamed: 0,a,b,c,d
apple,0,1,2,3
banana,4,5,6,7


In [23]:
sampledata.loc['orange'] = [11, 12, 13, 14]
sampledata

Unnamed: 0,a,b,c,d
apple,0,1,2,3
banana,4,5,6,7
orange,11,12,13,14


In [24]:
sampledata.iloc[2] = [21, 31, 41, 51]
sampledata

Unnamed: 0,a,b,c,d
apple,0,1,2,3
banana,4,5,6,7
orange,21,31,41,51


In [25]:
sampledata.loc['banana':'orange']

Unnamed: 0,a,b,c,d
banana,4,5,6,7
orange,21,31,41,51


In [26]:
sampledata.iloc[1:]

Unnamed: 0,a,b,c,d
banana,4,5,6,7
orange,21,31,41,51


### Reading Files into a DataFrame

In [27]:
# available file formats for import include .txt, .csv, .json, .xlsx

df_country = pd.read_json('country_data.json')

df_country.tail(10)

Unnamed: 0,Name,Continent,Region,SurfaceArea,IndepYear,Population,LifeExpectancy,GNP,LocalName,Language
974,Zambia,Africa,Eastern Africa,752618.0,1964.0,9169000,37.2,3377.0,Zambia,Bemba
975,Zambia,Africa,Eastern Africa,752618.0,1964.0,9169000,37.2,3377.0,Zambia,Chewa
976,Zambia,Africa,Eastern Africa,752618.0,1964.0,9169000,37.2,3377.0,Zambia,Lozi
977,Zambia,Africa,Eastern Africa,752618.0,1964.0,9169000,37.2,3377.0,Zambia,Nsenga
978,Zambia,Africa,Eastern Africa,752618.0,1964.0,9169000,37.2,3377.0,Zambia,Nyanja
979,Zambia,Africa,Eastern Africa,752618.0,1964.0,9169000,37.2,3377.0,Zambia,Tongan
980,Zimbabwe,Africa,Eastern Africa,390757.0,1980.0,11669000,37.8,5951.0,Zimbabwe,English
981,Zimbabwe,Africa,Eastern Africa,390757.0,1980.0,11669000,37.8,5951.0,Zimbabwe,Ndebele
982,Zimbabwe,Africa,Eastern Africa,390757.0,1980.0,11669000,37.8,5951.0,Zimbabwe,Nyanja
983,Zimbabwe,Africa,Eastern Africa,390757.0,1980.0,11669000,37.8,5951.0,Zimbabwe,Shona


In [28]:
df_country.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 984 entries, 0 to 983
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Name            984 non-null    object 
 1   Continent       984 non-null    object 
 2   Region          984 non-null    object 
 3   SurfaceArea     984 non-null    float64
 4   IndepYear       892 non-null    float64
 5   Population      984 non-null    int64  
 6   LifeExpectancy  967 non-null    float64
 7   GNP             984 non-null    float64
 8   LocalName       984 non-null    object 
 9   Language        984 non-null    object 
dtypes: float64(4), int64(1), object(5)
memory usage: 77.0+ KB


In [29]:
df_country.describe()

Unnamed: 0,SurfaceArea,IndepYear,Population,LifeExpectancy,GNP
count,984.0,892.0,984.0,967.0,984.0
mean,1104603.0,1809.380045,54355880.0,64.336298,233929.5
std,2690225.0,546.268082,180128300.0,12.241236,1002474.0
min,0.4,-1523.0,0.0,37.2,0.0
25%,30518.0,1905.0,2024000.0,54.15,1706.0
50%,237595.5,1958.0,8678200.0,68.0,8005.0
75%,801590.0,1971.0,31147000.0,74.5,73692.0
max,17075400.0,1994.0,1277558000.0,83.5,8510700.0


In [30]:
df_export = (df_country[0:100])
df_export.tail(5)

Unnamed: 0,Name,Continent,Region,SurfaceArea,IndepYear,Population,LifeExpectancy,GNP,LocalName,Language
95,Bahrain,Asia,Middle East,694.0,1971.0,617000,73.0,6366.0,Al-Bahrayn,English
96,Bahamas,North America,Caribbean,13878.0,1973.0,307000,71.1,3527.0,The Bahamas,Creole English
97,Bahamas,North America,Caribbean,13878.0,1973.0,307000,71.1,3527.0,The Bahamas,Creole French
98,Bosnia and Herzegovina,Europe,Southern Europe,51197.0,1992.0,3972000,71.5,2841.0,Bosna i Hercegovina,Serbo-Croatian
99,Belarus,Europe,Eastern Europe,207600.0,1991.0,10236000,68.0,13714.0,Belarus,Belorussian


In [31]:
df_export.to_csv('country_data_export.csv', index=False)

### Side note: Max Rows
Settings in pandas default to displaying a maximum of 60 rows for a DataFrame. You can check your current settings with the statement below:

In [32]:
print(pd.options.display.max_rows)

60


If you want to chance your max rows setting, you can use the same statement and assign a new row value:

#
# Cleaning Data with Pandas

In [33]:
df_s = pd.read_csv('survey_data.csv')
df_s

Unnamed: 0,ID,Timestamp,Age,Country,Postal Code,Min Sib,Max Sib,TV_Amount,Muppet,Ghost_Belief,Ghost_Encounter,Rock_Paper_Scissors
0,1,6/21/2024 12:50,42,United States,60189,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Paper
1,2,6/21/2024 13:23,33,USA,59086,5,,6-8 hours per week,Gonzo the Great,Maybe,No,Scissors
2,3,6/21/2024 13:35,52,USA,97219,0,0.0,1-3 hours per week,Beaker. A thousand times Beaker.,Maybe,Maybe,Rock
3,4,6/21/2024 13:40,65,United States of America,55118,5,,More than 8 hours per week,Fozzie Bear,Yes,Yes,Paper
4,5,6/21/2024 13:50,51,United States,55116,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Rock
...,...,...,...,...,...,...,...,...,...,...,...,...
120,121,9/23/2024 11:36,21,United States,60104,3,4.0,3-6 hours per week,Miss Piggy,Yes,Yes,Scissors
121,122,9/23/2024 11:36,19,United States,60616,5,,6-8 hours per week,Beaker,Yes,Maybe,Rock
122,123,9/23/2024 11:36,30,United States,60805,5,,6-8 hours per week,Kermit the Frog,Yes,No,Scissors
123,124,9/23/2024 11:36,25,United States of America,60473,3,4.0,More than 8 hours per week,Cooke Monster,Maybe,No,Scissors


In [34]:
df_s.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ID                   125 non-null    int64  
 1   Timestamp            125 non-null    object 
 2   Age                  125 non-null    int64  
 3   Country              125 non-null    object 
 4   Postal Code          124 non-null    object 
 5   Min Sib              125 non-null    int64  
 6   Max Sib              117 non-null    float64
 7   TV_Amount            125 non-null    object 
 8   Muppet               125 non-null    object 
 9   Ghost_Belief         124 non-null    object 
 10  Ghost_Encounter      125 non-null    object 
 11  Rock_Paper_Scissors  125 non-null    object 
dtypes: float64(1), int64(3), object(8)
memory usage: 11.8+ KB


In [35]:
df_s2 = df_s.dropna()
df_s2

Unnamed: 0,ID,Timestamp,Age,Country,Postal Code,Min Sib,Max Sib,TV_Amount,Muppet,Ghost_Belief,Ghost_Encounter,Rock_Paper_Scissors
0,1,6/21/2024 12:50,42,United States,60189,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Paper
2,3,6/21/2024 13:35,52,USA,97219,0,0.0,1-3 hours per week,Beaker. A thousand times Beaker.,Maybe,Maybe,Rock
4,5,6/21/2024 13:50,51,United States,55116,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Rock
5,6,6/21/2024 14:01,28,USA,43207,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Rock
6,7,6/21/2024 14:02,40,Usa,53704,1,2.0,Less than 1 hour per week,I do not have a favorite Muppet,No,No,Rock
...,...,...,...,...,...,...,...,...,...,...,...,...
118,119,9/23/2024 11:36,28,United States,60626,3,4.0,6-8 hours per week,Kermit the Frog,Yes,Maybe,Paper
119,120,9/23/2024 11:36,29,United States,60632,1,2.0,More than 8 hours per week,I do not have a favorite Muppet,Yes,Maybe,Paper
120,121,9/23/2024 11:36,21,United States,60104,3,4.0,3-6 hours per week,Miss Piggy,Yes,Yes,Scissors
123,124,9/23/2024 11:36,25,United States of America,60473,3,4.0,More than 8 hours per week,Cooke Monster,Maybe,No,Scissors


In [36]:
df_s2.drop([119,120], axis=0, inplace=True)
df_s2

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_s2.drop([119,120], axis=0, inplace=True)


Unnamed: 0,ID,Timestamp,Age,Country,Postal Code,Min Sib,Max Sib,TV_Amount,Muppet,Ghost_Belief,Ghost_Encounter,Rock_Paper_Scissors
0,1,6/21/2024 12:50,42,United States,60189,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Paper
2,3,6/21/2024 13:35,52,USA,97219,0,0.0,1-3 hours per week,Beaker. A thousand times Beaker.,Maybe,Maybe,Rock
4,5,6/21/2024 13:50,51,United States,55116,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Rock
5,6,6/21/2024 14:01,28,USA,43207,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Rock
6,7,6/21/2024 14:02,40,Usa,53704,1,2.0,Less than 1 hour per week,I do not have a favorite Muppet,No,No,Rock
...,...,...,...,...,...,...,...,...,...,...,...,...
116,117,9/23/2024 11:35,24,United States,60647,3,4.0,More than 8 hours per week,Kermit the Frog,Yes,Maybe,Scissors
117,118,9/23/2024 11:35,25,United States,60532,3,4.0,6-8 hours per week,Gonzo the Great,Yes,Maybe,Paper
118,119,9/23/2024 11:36,28,United States,60626,3,4.0,6-8 hours per week,Kermit the Frog,Yes,Maybe,Paper
123,124,9/23/2024 11:36,25,United States of America,60473,3,4.0,More than 8 hours per week,Cooke Monster,Maybe,No,Scissors


In [37]:
df_s2 = df_s2.drop_duplicates()
df_s2

Unnamed: 0,ID,Timestamp,Age,Country,Postal Code,Min Sib,Max Sib,TV_Amount,Muppet,Ghost_Belief,Ghost_Encounter,Rock_Paper_Scissors
0,1,6/21/2024 12:50,42,United States,60189,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Paper
2,3,6/21/2024 13:35,52,USA,97219,0,0.0,1-3 hours per week,Beaker. A thousand times Beaker.,Maybe,Maybe,Rock
4,5,6/21/2024 13:50,51,United States,55116,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Rock
5,6,6/21/2024 14:01,28,USA,43207,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Rock
6,7,6/21/2024 14:02,40,Usa,53704,1,2.0,Less than 1 hour per week,I do not have a favorite Muppet,No,No,Rock
...,...,...,...,...,...,...,...,...,...,...,...,...
116,117,9/23/2024 11:35,24,United States,60647,3,4.0,More than 8 hours per week,Kermit the Frog,Yes,Maybe,Scissors
117,118,9/23/2024 11:35,25,United States,60532,3,4.0,6-8 hours per week,Gonzo the Great,Yes,Maybe,Paper
118,119,9/23/2024 11:36,28,United States,60626,3,4.0,6-8 hours per week,Kermit the Frog,Yes,Maybe,Paper
123,124,9/23/2024 11:36,25,United States of America,60473,3,4.0,More than 8 hours per week,Cooke Monster,Maybe,No,Scissors


In [38]:
df_s2.duplicated()

0      False
2      False
4      False
5      False
6      False
       ...  
116    False
117    False
118    False
123    False
124    False
Length: 113, dtype: bool

In [39]:
filtered_df = df_s2[(df_s2['Ghost_Belief'] == 'Yes')]
filtered_df

Unnamed: 0,ID,Timestamp,Age,Country,Postal Code,Min Sib,Max Sib,TV_Amount,Muppet,Ghost_Belief,Ghost_Encounter,Rock_Paper_Scissors
4,5,6/21/2024 13:50,51,United States,55116,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Rock
10,11,6/21/2024 14:52,52,United States,55104,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Paper
14,15,6/21/2024 20:06,42,America,Somewhere in WI,1,2.0,3-6 hours per week,Miss Piggy,Yes,Yes,Scissors
15,16,6/21/2024 20:32,18,United States,54016,1,2.0,3-6 hours per week,Kermit the Frog,Yes,Yes,Paper
16,17,6/21/2024 20:35,44,United States of America,54016,1,2.0,3-6 hours per week,Animal,Yes,Maybe,Rock
17,18,6/21/2024 20:43,8,United States,55102,1,2.0,6-8 hours per week,Kermit the Frog,Yes,Yes,Rock
20,21,6/21/2024 21:03,55,USA,10024,0,0.0,3-6 hours per week,Gonzo the Great,Yes,Yes,Paper
22,23,6/21/2024 23:25,42,United States,55104,1,2.0,1-3 hours per week,Kermit the Frog,Yes,Yes,Paper
23,24,6/22/2024 4:40,70,United States of America,55119,1,2.0,1-3 hours per week,Miss Piggy,Yes,No,Scissors
25,26,6/22/2024 7:08,46,United States of America,55434,3,4.0,More than 8 hours per week,Fozzie Bear,Yes,Yes,Rock


In [40]:
filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 39 entries, 4 to 118
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ID                   39 non-null     int64  
 1   Timestamp            39 non-null     object 
 2   Age                  39 non-null     int64  
 3   Country              39 non-null     object 
 4   Postal Code          39 non-null     object 
 5   Min Sib              39 non-null     int64  
 6   Max Sib              39 non-null     float64
 7   TV_Amount            39 non-null     object 
 8   Muppet               39 non-null     object 
 9   Ghost_Belief         39 non-null     object 
 10  Ghost_Encounter      39 non-null     object 
 11  Rock_Paper_Scissors  39 non-null     object 
dtypes: float64(1), int64(3), object(8)
memory usage: 4.0+ KB


In [41]:
df_s2.replace({'Country':'United States of America'}, 'United States', inplace=True)
df_s2

Unnamed: 0,ID,Timestamp,Age,Country,Postal Code,Min Sib,Max Sib,TV_Amount,Muppet,Ghost_Belief,Ghost_Encounter,Rock_Paper_Scissors
0,1,6/21/2024 12:50,42,United States,60189,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Paper
2,3,6/21/2024 13:35,52,USA,97219,0,0.0,1-3 hours per week,Beaker. A thousand times Beaker.,Maybe,Maybe,Rock
4,5,6/21/2024 13:50,51,United States,55116,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Rock
5,6,6/21/2024 14:01,28,USA,43207,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Rock
6,7,6/21/2024 14:02,40,Usa,53704,1,2.0,Less than 1 hour per week,I do not have a favorite Muppet,No,No,Rock
...,...,...,...,...,...,...,...,...,...,...,...,...
116,117,9/23/2024 11:35,24,United States,60647,3,4.0,More than 8 hours per week,Kermit the Frog,Yes,Maybe,Scissors
117,118,9/23/2024 11:35,25,United States,60532,3,4.0,6-8 hours per week,Gonzo the Great,Yes,Maybe,Paper
118,119,9/23/2024 11:36,28,United States,60626,3,4.0,6-8 hours per week,Kermit the Frog,Yes,Maybe,Paper
123,124,9/23/2024 11:36,25,United States,60473,3,4.0,More than 8 hours per week,Cooke Monster,Maybe,No,Scissors


In [42]:
df_s2.replace({'Country':'USA'}, 'United States', inplace=True)
df_s2

Unnamed: 0,ID,Timestamp,Age,Country,Postal Code,Min Sib,Max Sib,TV_Amount,Muppet,Ghost_Belief,Ghost_Encounter,Rock_Paper_Scissors
0,1,6/21/2024 12:50,42,United States,60189,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Paper
2,3,6/21/2024 13:35,52,United States,97219,0,0.0,1-3 hours per week,Beaker. A thousand times Beaker.,Maybe,Maybe,Rock
4,5,6/21/2024 13:50,51,United States,55116,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Rock
5,6,6/21/2024 14:01,28,United States,43207,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Rock
6,7,6/21/2024 14:02,40,Usa,53704,1,2.0,Less than 1 hour per week,I do not have a favorite Muppet,No,No,Rock
...,...,...,...,...,...,...,...,...,...,...,...,...
116,117,9/23/2024 11:35,24,United States,60647,3,4.0,More than 8 hours per week,Kermit the Frog,Yes,Maybe,Scissors
117,118,9/23/2024 11:35,25,United States,60532,3,4.0,6-8 hours per week,Gonzo the Great,Yes,Maybe,Paper
118,119,9/23/2024 11:36,28,United States,60626,3,4.0,6-8 hours per week,Kermit the Frog,Yes,Maybe,Paper
123,124,9/23/2024 11:36,25,United States,60473,3,4.0,More than 8 hours per week,Cooke Monster,Maybe,No,Scissors


In [43]:
df_s.head(5)

Unnamed: 0,ID,Timestamp,Age,Country,Postal Code,Min Sib,Max Sib,TV_Amount,Muppet,Ghost_Belief,Ghost_Encounter,Rock_Paper_Scissors
0,1,6/21/2024 12:50,42,United States,60189,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Paper
1,2,6/21/2024 13:23,33,USA,59086,5,,6-8 hours per week,Gonzo the Great,Maybe,No,Scissors
2,3,6/21/2024 13:35,52,USA,97219,0,0.0,1-3 hours per week,Beaker. A thousand times Beaker.,Maybe,Maybe,Rock
3,4,6/21/2024 13:40,65,United States of America,55118,5,,More than 8 hours per week,Fozzie Bear,Yes,Yes,Paper
4,5,6/21/2024 13:50,51,United States,55116,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Rock


In [44]:
# This was that pesky .fillna() example. I went back and changed the column reference 
# in the part after the equal sign to use bracket notation as you see below, and now it works. ¯\_(ツ)_/¯


df_s['Max Sib'] = df_s['Max Sib'].fillna('test')

In [45]:
for i in df_s2.index:
    if 'Beaker' in df_s2.loc[i, "Muppet"]:
        df_s2.loc[i, "Muppet"] = "Beaker"

df_s2

Unnamed: 0,ID,Timestamp,Age,Country,Postal Code,Min Sib,Max Sib,TV_Amount,Muppet,Ghost_Belief,Ghost_Encounter,Rock_Paper_Scissors
0,1,6/21/2024 12:50,42,United States,60189,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Paper
2,3,6/21/2024 13:35,52,United States,97219,0,0.0,1-3 hours per week,Beaker,Maybe,Maybe,Rock
4,5,6/21/2024 13:50,51,United States,55116,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Rock
5,6,6/21/2024 14:01,28,United States,43207,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Rock
6,7,6/21/2024 14:02,40,Usa,53704,1,2.0,Less than 1 hour per week,I do not have a favorite Muppet,No,No,Rock
...,...,...,...,...,...,...,...,...,...,...,...,...
116,117,9/23/2024 11:35,24,United States,60647,3,4.0,More than 8 hours per week,Kermit the Frog,Yes,Maybe,Scissors
117,118,9/23/2024 11:35,25,United States,60532,3,4.0,6-8 hours per week,Gonzo the Great,Yes,Maybe,Paper
118,119,9/23/2024 11:36,28,United States,60626,3,4.0,6-8 hours per week,Kermit the Frog,Yes,Maybe,Paper
123,124,9/23/2024 11:36,25,United States,60473,3,4.0,More than 8 hours per week,Cooke Monster,Maybe,No,Scissors


In [46]:
df_s2.info()

<class 'pandas.core.frame.DataFrame'>
Index: 113 entries, 0 to 124
Data columns (total 12 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ID                   113 non-null    int64  
 1   Timestamp            113 non-null    object 
 2   Age                  113 non-null    int64  
 3   Country              113 non-null    object 
 4   Postal Code          113 non-null    object 
 5   Min Sib              113 non-null    int64  
 6   Max Sib              113 non-null    float64
 7   TV_Amount            113 non-null    object 
 8   Muppet               113 non-null    object 
 9   Ghost_Belief         113 non-null    object 
 10  Ghost_Encounter      113 non-null    object 
 11  Rock_Paper_Scissors  113 non-null    object 
dtypes: float64(1), int64(3), object(8)
memory usage: 15.5+ KB


In [47]:
for i in df_s2.index:
    if 'Wakanda' in df_s2.loc[i, "Country"]:
        df_s2.drop(i, inplace=True)

df_s2

Unnamed: 0,ID,Timestamp,Age,Country,Postal Code,Min Sib,Max Sib,TV_Amount,Muppet,Ghost_Belief,Ghost_Encounter,Rock_Paper_Scissors
0,1,6/21/2024 12:50,42,United States,60189,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Paper
2,3,6/21/2024 13:35,52,United States,97219,0,0.0,1-3 hours per week,Beaker,Maybe,Maybe,Rock
4,5,6/21/2024 13:50,51,United States,55116,1,2.0,More than 8 hours per week,Animal,Yes,Maybe,Rock
5,6,6/21/2024 14:01,28,United States,43207,1,2.0,3-6 hours per week,Rizzo the Rat,Maybe,Maybe,Rock
6,7,6/21/2024 14:02,40,Usa,53704,1,2.0,Less than 1 hour per week,I do not have a favorite Muppet,No,No,Rock
...,...,...,...,...,...,...,...,...,...,...,...,...
116,117,9/23/2024 11:35,24,United States,60647,3,4.0,More than 8 hours per week,Kermit the Frog,Yes,Maybe,Scissors
117,118,9/23/2024 11:35,25,United States,60532,3,4.0,6-8 hours per week,Gonzo the Great,Yes,Maybe,Paper
118,119,9/23/2024 11:36,28,United States,60626,3,4.0,6-8 hours per week,Kermit the Frog,Yes,Maybe,Paper
123,124,9/23/2024 11:36,25,United States,60473,3,4.0,More than 8 hours per week,Cooke Monster,Maybe,No,Scissors


In [48]:
print(df_s2['Muppet'])

0                        Rizzo the Rat
2                               Beaker
4                               Animal
5                        Rizzo the Rat
6      I do not have a favorite Muppet
                    ...               
116                    Kermit the Frog
117                    Gonzo the Great
118                    Kermit the Frog
123                      Cooke Monster
124                             Animal
Name: Muppet, Length: 113, dtype: object


In [49]:
print(df_s2.Muppet)

0                        Rizzo the Rat
2                               Beaker
4                               Animal
5                        Rizzo the Rat
6      I do not have a favorite Muppet
                    ...               
116                    Kermit the Frog
117                    Gonzo the Great
118                    Kermit the Frog
123                      Cooke Monster
124                             Animal
Name: Muppet, Length: 113, dtype: object
