# Pandas series and dataframe

In [34]:
import pandas as pd

students_data = dict(business = 25, AI = 30, JS = 30, JAVA = 27)

students_data

{'business': 25, 'AI': 30, 'JS': 30, 'JAVA': 27}

In [35]:
series_program = pd.Series(students_data)
series_program

business    25
AI          30
JS          30
JAVA        27
dtype: int64

In [36]:
print(series_program)

business    25
AI          30
JS          30
JAVA        27
dtype: int64


In [37]:
series_program.iloc[0], series_program.iloc[-1] # första och sista värdet

(np.int64(25), np.int64(27))

In [38]:
series_program.keys() # index blir keys

Index(['business', 'AI', 'JS', 'JAVA'], dtype='object')

In [39]:
series_program["AI"] # dunder repr av series_program 

np.int64(30)

In [40]:
print(series_program["AI"])

30


**Another series using list**

In [41]:
import random as rnd

rnd.seed(42)

dice_list = [rnd.randint(1,6) for _ in range(5)]
dice_list

[6, 1, 1, 6, 3]

In [42]:
dice_series = pd.Series(dice_list)
dice_series

0    6
1    1
2    1
3    6
4    3
dtype: int64

In [43]:
dice_series.min(), dice_series.max(), dice_series.mean()

(np.int64(1), np.int64(6), np.float64(3.4))

## Dataframe

- analog of 2d numpy array with flexible row indices and col names

In [51]:
series_program

business    25
AI          30
JS          30
JAVA        27
dtype: int64

In [52]:
df_programs = pd.DataFrame(series_program, columns=("Num students",)) # komma behövs
df_programs

Unnamed: 0,Num students
business,25
AI,30
JS,30
JAVA,27


In [53]:
# create 2 series objects using dictionary
students = pd.Series(dict(AI=25, NET=30, APP=30, Java=27))
language = pd.Series(dict(AI="Pyhton", NET="C#", APP="Kotlin", Java="Java"))

students

AI      25
NET     30
APP     30
Java    27
dtype: int64

In [54]:
language

AI      Pyhton
NET         C#
APP     Kotlin
Java      Java
dtype: object

In [57]:
df_programs = pd.DataFrame({"Students": students, "Language": language}) # Skapar en DF av 2 series
df_programs

Unnamed: 0,Students,Language
AI,25,Pyhton
NET,30,C#
APP,30,Kotlin
Java,27,Java


In [47]:
import numpy as np

pd.DataFrame({"Students": np.array((25, 30, 30, 27)),
              "Language" : ["Python", "C#", "Kotlin", "Java"],
             },
    index = ["AI", ".NET", "APP", "Java"], # Måste vara utanför dicten
    )




Unnamed: 0,Students,Language
AI,25,Python
.NET,30,C#
APP,30,Kotlin
Java,27,Java


In [58]:
df_programs.index

Index(['AI', 'NET', 'APP', 'Java'], dtype='object')

## Data selection

In [59]:
df_programs["Students"]

AI      25
NET     30
APP     30
Java    27
Name: Students, dtype: int64

In [61]:
df_programs[["Language", "Students"]] # Måste vara 2st[[]] när man väljer flera (?)

Unnamed: 0,Language,Students
AI,Pyhton,25
NET,C#,30
APP,Kotlin,30
Java,Java,27


In [62]:
df_programs.Language # Använd helst inte

AI      Pyhton
NET         C#
APP     Kotlin
Java      Java
Name: Language, dtype: object

In [63]:
df_programs["Language"]["NET"]

'C#'

## Indexers

In [64]:
df_programs.loc["Java"] # .loc lokaliserar indexet "Java"

Students      27
Language    Java
Name: Java, dtype: object

In [66]:
try:
    df_programs[["Java", "APP"]] # Letar efter kolumner, därav felmeddelande JAVA och APP är index
except KeyError as err:
    print(err)

"None of [Index(['Java', 'APP'], dtype='object')] are in the [columns]"


In [67]:
df_programs.loc["AI":"APP"] # Blir som slicing från AI till APP (.loc går på index)

Unnamed: 0,Students,Language
AI,25,Pyhton
NET,30,C#
APP,30,Kotlin


In [68]:
df_programs.iloc[1:3] # Slicing. .iloc går på indexnumret istället för index namnet

Students    30
Language    C#
Name: NET, dtype: object

## Masking

In [69]:
df_programs["Students"] > 25

AI      False
NET      True
APP      True
Java     True
Name: Students, dtype: bool

In [70]:
df_programs[df_programs["Students"] > 25] # Boolean mask, returnerar alla där > 25 är True

Unnamed: 0,Students,Language
NET,30,C#
APP,30,Kotlin
Java,27,Java


In [71]:
df_programs.query("Students > 25") # samma som ovan men använder query metoden istället

Unnamed: 0,Students,Language
NET,30,C#
APP,30,Kotlin
Java,27,Java
