# Pandas Series and Dataframe

In [139]:
import pandas as pd

students_data = dict(business = 25, AI = 30, JS = 30, Java = 27 )
students_data

{'business': 25, 'AI': 30, 'JS': 30, 'Java': 27}

In [140]:
series_program = pd.Series(data=students_data)
series_program

business    25
AI          30
JS          30
Java        27
dtype: int64

In [141]:
series_program.iloc[0],series_program.iloc[-1]
# index

(np.int64(25), np.int64(27))

In [142]:
series_program.keys()

Index(['business', 'AI', 'JS', 'Java'], dtype='object')

In [143]:
series_program["AI"]

np.int64(30)

In [144]:
print(series_program["AI"])

30


In [145]:
series_program["AI"] + 50

np.int64(80)

**another series using list**

In [146]:
import random as rnd

rnd.seed(42)

dice_list = [rnd.randint(1,6) for _ in range(5)]
dice_list

[6, 1, 1, 6, 3]

In [147]:
dice_series = pd.Series(dice_list)
dice_series

0    6
1    1
2    1
3    6
4    3
dtype: int64

In [148]:
dice_series.min()
dice_series.max()
dice_series.mean()

np.float64(3.4)

## DATAFRAME 

analog of 2D numpy array with flexible row indices and col names


In [149]:
series_program

business    25
AI          30
JS          30
Java        27
dtype: int64

In [150]:
df_programs = pd.DataFrame(series_program, columns=["Students", "Language"])
print(df_programs)


ValueError: Shape of passed values is (4, 1), indices imply (4, 2)

In [135]:
#create 2 series objects using dictionary
students = pd.Series(dict(AI = 25, NET = 30, APP = 30, JAVA = 27))
language = pd.Series(dict(AI = "Python", NET = "C#", APP = "Kotlin", JAVA = "Java"))

students


AI      25
NET     30
APP     30
JAVA    27
dtype: int64

In [136]:
language

AI      Python
NET         C#
APP     Kotlin
JAVA      Java
dtype: object

In [137]:
df_programs = pd.DataFrame({"Students": students, "Language" : language})
df_programs

Unnamed: 0,Students,Language
AI,25,Python
NET,30,C#
APP,30,Kotlin
JAVA,27,Java


In [138]:
import numpy as np

pd.DataFrame(
    {
    "Students": np.array((25,30,30,27)),
    "Language": ["Python", "C#", "Kotlin", "Java"],
   },
    index=["AI", ".NET","APP", "Java"]
)     


Unnamed: 0,Students,Language
AI,25,Python
.NET,30,C#
APP,30,Kotlin
Java,27,Java


In [70]:
df_programs.index

Index(['business', 'AI', 'JS', 'Java'], dtype='object')

##Data Selection##

In [126]:
df_programs["Students"]

business    25
AI          30
JS          30
Java        27
Name: Students, dtype: int64

In [151]:
df_programs["Language"]

AI      Python
NET         C#
APP     Kotlin
JAVA      Java
Name: Language, dtype: object

In [152]:
df_programs[["Language", "Students"]]

Unnamed: 0,Language,Students
AI,Python,25
NET,C#,30
APP,Kotlin,30
JAVA,Java,27


In [154]:
df_programs.Language

AI      Python
NET         C#
APP     Kotlin
JAVA      Java
Name: Language, dtype: object

In [155]:
df_programs["Language"]["NET"]

'C#'

##INDEXERS loc iloc ##

In [157]:
df_programs.loc["JAVA"]

Students      27
Language    Java
Name: JAVA, dtype: object

In [158]:
df_programs.loc["AI"]

Students        25
Language    Python
Name: AI, dtype: object

In [164]:
try:
    df_programs[["JAVA", "APP"]]
except KeyError as err:
    print(err)

"None of [Index(['JAVA', 'APP'], dtype='object')] are in the [columns]"


In [165]:
df_programs.loc["AI": "APP"]

Unnamed: 0,Students,Language
AI,25,Python
NET,30,C#
APP,30,Kotlin


In [166]:
df_programs.iloc[1:3]

Unnamed: 0,Students,Language
NET,30,C#
APP,30,Kotlin


**MASKING**

In [167]:
df_programs["Students"] > 25

AI      False
NET      True
APP      True
JAVA     True
Name: Students, dtype: bool

In [168]:
df_programs[df_programs["Students"]> 25]

Unnamed: 0,Students,Language
NET,30,C#
APP,30,Kotlin
JAVA,27,Java


In [169]:
df_programs.query("Students > 25")

Unnamed: 0,Students,Language
NET,30,C#
APP,30,Kotlin
JAVA,27,Java
