# Pandas basics

- pandas.Series
- pandas.DataFrame
- read_csv
- indexing
- plotting

## Pandas Series

- can create from dictionary
- can create from list
- can create from np.array

In [3]:
import pandas as pd

programs_dict = dict(AI = 26, NET = 38, JAVA = 30, UX = 28)

programs_series = pd.Series(programs_dict)

programs_series

AI      26
NET     38
JAVA    30
UX      28
dtype: int64

In [9]:
print(f"{programs_series[0] = }")
print(f"{programs_series[-1] = }")

print(f"{programs_series.keys() = }")
print(f"{programs_series.keys()[0] = }")

programs_series[0] = 26
programs_series[-1] = 28
programs_series.keys() = Index(['AI', 'NET', 'JAVA', 'UX'], dtype='object')
programs_series.keys()[0] = 'AI'


In [11]:
import random as rnd

rnd.seed(1337)

dice_series = pd.Series([rnd.randint(1,6) for _ in range(10)])
dice_series.head() # inspects the five first rows


0    5
1    5
2    6
3    3
4    5
dtype: int64

In [14]:
print(dice_series)
print(dice_series.min())
print(dice_series.argmin())
print(dice_series.max())
print(dice_series.mean())
print(dice_series.median())

0    5
1    5
2    6
3    3
4    5
5    5
6    6
7    2
8    3
9    4
dtype: int64
2
7
6
4.4
5.0


## DataFrame

- tabular data with rows and columns
- analog to 2d numpy arrays with flexible row indices and col name
- "specialized" dictionary with col name mapped to a Series object

In [17]:
df_programs = pd.DataFrame(programs_series, columns=("Number of students",))
df_programs

Unnamed: 0,Number of students
AI,26
NET,38
JAVA,30
UX,28


In [25]:

students = pd.Series(dict(AI = 26, NET = 38, UX = 28, JAVA = 30))
skills = pd.Series(dict(AI = "Python", NET = "C#", UX = "Figma", JAVA = "Java"))


df_programs = pd.DataFrame(dict(Students = students, Skills = skills))
df_programs

Unnamed: 0,Students,Skills
AI,26,Python
NET,38,C#
UX,28,Figma
JAVA,30,Java


In [26]:
df_programs["Students"]

AI      26
NET     38
UX      28
JAVA    30
Name: Students, dtype: int64

In [27]:
df_programs["Skills"]

AI      Python
NET         C#
UX       Figma
JAVA      Java
Name: Skills, dtype: object

In [29]:
df_programs["Skills"][0], df_programs["Skills"]["UX"]

('Python', 'Figma')

## Indexers

- loc
- iloc

In [31]:
df_programs

Unnamed: 0,Students,Skills
AI,26,Python
NET,38,C#
UX,28,Figma
JAVA,30,Java


In [37]:
df_programs.loc["AI"]

Students        26
Skills      Python
Name: AI, dtype: object

In [32]:
df_programs.iloc[1:4]

Unnamed: 0,Students,Skills
NET,38,C#
UX,28,Figma
JAVA,30,Java


## Masking

In [33]:
df_programs

Unnamed: 0,Students,Skills
AI,26,Python
NET,38,C#
UX,28,Figma
JAVA,30,Java


In [34]:
df_programs["Students"] >= 30

AI      False
NET      True
UX      False
JAVA     True
Name: Students, dtype: bool

In [35]:
df_programs[df_programs["Students"] >= 30]

Unnamed: 0,Students,Skills
NET,38,C#
JAVA,30,Java
