# Dictionaries & Pandas

## Dictionaries

In [1]:
countries = ["usa", "china", "russia"]
cou_ind = countries.index("china")
cou_ind

1

In [2]:
capitals = ["Washington", "Pekin", "Moskow"]
d = {
    "country": countries,
    "capital": capitals
}
print(d)
print(d.keys())

{'country': ['usa', 'china', 'russia'], 'capital': ['Washington', 'Pekin', 'Moskow']}
dict_keys(['country', 'capital'])


### Loop Dictionary

In [3]:
for key, value in d.items():
    print(f"{key} : {value}")

country : ['usa', 'china', 'russia']
capital : ['Washington', 'Pekin', 'Moskow']


## Pandas

### Importing

In [4]:
import pandas as pd

### Prepearing Data

In [5]:
my_devices = {
    "IDs": ["0001", "0002", "0003", "0004", "0005"],
    "Devices": ["Macbook", "Iphone", "Mac Mini", "TV", "Ipad"],
    "Charge": [86, 100, None, None, 32]
}
my_devices

{'IDs': ['0001', '0002', '0003', '0004', '0005'],
 'Devices': ['Macbook', 'Iphone', 'Mac Mini', 'TV', 'Ipad'],
 'Charge': [86, 100, None, None, 32]}

### Creating DataFrame

In [6]:
my_devices_pd = pd.DataFrame(my_devices)
my_devices_pd

Unnamed: 0,IDs,Devices,Charge
0,1,Macbook,86.0
1,2,Iphone,100.0
2,3,Mac Mini,
3,4,TV,
4,5,Ipad,32.0


### Setting Row Labels

In [7]:
labels = ["PC", "TP", "PC", "TV", "TA"]
my_devices_pd.index = labels
my_devices_pd

Unnamed: 0,IDs,Devices,Charge
PC,1,Macbook,86.0
TP,2,Iphone,100.0
PC,3,Mac Mini,
TV,4,TV,
TA,5,Ipad,32.0


### Read CSV File

In [8]:
cars = pd.read_csv('cars.csv', index_col=0)
cars

Unnamed: 0,brand,model,power,price
SU,Subaru,Forester,200,10000
MI,Mitsubishi,Lancher Evo,210,10500
DG,Dodge,Challenger,728,35700
CT,Chevrolet,Camaro SS,527,30000
FD,Ford,"""Mustang""",630,33000


### Series

In [9]:
pandas_series = cars["brand"]
pandas_dataframe = cars[["brand"]]
print(pandas_series)
print(pandas_dataframe)

SU        Subaru
MI    Mitsubishi
DG         Dodge
CT     Chevrolet
FD          Ford
Name: brand, dtype: object
         brand
SU      Subaru
MI  Mitsubishi
DG       Dodge
CT   Chevrolet
FD        Ford


### Accesing Data in a DataFrame

In [10]:
brand_price = cars[["brand", "price"]]
brand_price

Unnamed: 0,brand,price
SU,Subaru,10000
MI,Mitsubishi,10500
DG,Dodge,35700
CT,Chevrolet,30000
FD,Ford,33000


In [11]:
cars[1:4]

Unnamed: 0,brand,model,power,price
MI,Mitsubishi,Lancher Evo,210,10500
DG,Dodge,Challenger,728,35700
CT,Chevrolet,Camaro SS,527,30000


In [12]:
# loc: label based 
print(cars.loc["DG"])
print("---------------")
print(cars.loc[["FD", "CT", "SU"], ["brand", "price"]])
print("---------------")
print(cars.loc[:, ["brand", "price"]])

brand         Dodge
model    Challenger
power           728
price         35700
Name: DG, dtype: object
---------------
        brand  price
FD       Ford  33000
CT  Chevrolet  30000
SU     Subaru  10000
---------------
         brand  price
SU      Subaru  10000
MI  Mitsubishi  10500
DG       Dodge  35700
CT   Chevrolet  30000
FD        Ford  33000


In [13]:
# iloc: integer-position based
print(cars.iloc[1])
print("---------------")
print(cars.iloc[[1, 2, 3], [1, 2]])
print("---------------")
print(cars.iloc[:, [1, 2]])

brand     Mitsubishi
model    Lancher Evo
power            210
price          10500
Name: MI, dtype: object
---------------
          model  power
MI  Lancher Evo    210
DG   Challenger    728
CT    Camaro SS    527
---------------
          model  power
SU     Forester    200
MI  Lancher Evo    210
DG   Challenger    728
CT    Camaro SS    527
FD    "Mustang"    630


### Head & Tail

In [23]:
print(cars.head()) # prints first 5 row
print("----------------------------------------------")
print(cars.tail()) # prints last 5 row

         brand        model  power  price
SU      Subaru     Forester    200  10000
MI  Mitsubishi  Lancher Evo    210  10500
DG       Dodge   Challenger    728  35700
CT   Chevrolet    Camaro SS    527  30000
FD        Ford    "Mustang"    630  33000
----------------------------------------------
         brand        model  power  price
SU      Subaru     Forester    200  10000
MI  Mitsubishi  Lancher Evo    210  10500
DG       Dodge   Challenger    728  35700
CT   Chevrolet    Camaro SS    527  30000
FD        Ford    "Mustang"    630  33000


### Pandas Memory Usage

In [27]:
memory_usage_bytes = cars.memory_usage(deep=True).sum()
memory_usage_mb = memory_usage_bytes / (1024 ** 2)

print(f"Memory usage of dataframe: {memory_usage_bytes} Bytes")
print(f"Memory usage of dataframe: {memory_usage_mb:.2f} MB")

Memory usage of dataframe: 1199 Bytes
Memory usage of dataframe: 0.00 MB


## Filtering Data

### Creating Example Data

In [15]:
import numpy as np

height_np = [format(num, '.2f') for num in np.random.normal(1.70, 0.3, 5)]
weight_np = [format(num, '.2f') for num in np.random.normal(60, 20, 5)]
age_np = np.random.normal(23, 5, 5).astype(int)
countries = ['USA', 'RUSSIA', 'JAPAN', 'TURKEY', 'GERMANY']
country_labels = ['US', 'RU', 'JP', 'TR', 'GE']

country_averages = pd.DataFrame({
    'country': countries,
    'age': age_np,
    'height': height_np,
    'weight': weight_np
}, index=[country_labels])

country_averages

Unnamed: 0,country,age,height,weight
US,USA,26,1.29,60.13
RU,RUSSIA,25,1.62,52.99
JP,JAPAN,22,1.81,17.29
TR,TURKEY,16,1.4,75.17
GE,GERMANY,24,1.97,47.81


### Filtering

In [16]:
selected_data = np.logical_and(country_averages['age'] >= 18, country_averages['age'] <= 25)
country_averages[selected_data]

Unnamed: 0,country,age,height,weight
RU,RUSSIA,25,1.62,52.99
JP,JAPAN,22,1.81,17.29
GE,GERMANY,24,1.97,47.81


### Loop Pandas DataFrame

In [17]:
for i in country_averages:
    print(i)

country
age
height
weight


### Iterrows

In [18]:
for lab, row in country_averages.iterrows():
    print(lab)
    print(row)

('US',)
country      USA
age           26
height      1.29
weight     60.13
Name: (US,), dtype: object
('RU',)
country    RUSSIA
age            25
height       1.62
weight      52.99
Name: (RU,), dtype: object
('JP',)
country    JAPAN
age           22
height      1.81
weight     17.29
Name: (JP,), dtype: object
('TR',)
country    TURKEY
age            16
height       1.40
weight      75.17
Name: (TR,), dtype: object
('GE',)
country    GERMANY
age             24
height        1.97
weight       47.81
Name: (GE,), dtype: object


#### Selected Data

In [19]:
for lab, row in country_averages.iterrows():
    print(lab[0] + ": " + row["height"])

US: 1.29
RU: 1.62
JP: 1.81
TR: 1.40
GE: 1.97


#### Add Column

In [20]:
for lab, row in country_averages.iterrows():
    country_averages.loc[lab, "name_length"] = len(row["country"])

country_averages

Unnamed: 0,country,age,height,weight,name_length
US,USA,26,1.29,60.13,3.0
RU,RUSSIA,25,1.62,52.99,6.0
JP,JAPAN,22,1.81,17.29,5.0
TR,TURKEY,16,1.4,75.17,6.0
GE,GERMANY,24,1.97,47.81,7.0


#### Apply

In [21]:
country_averages["name_length"] = country_averages["country"].apply(len)
country_averages

Unnamed: 0,country,age,height,weight,name_length
US,USA,26,1.29,60.13,3
RU,RUSSIA,25,1.62,52.99,6
JP,JAPAN,22,1.81,17.29,5
TR,TURKEY,16,1.4,75.17,6
GE,GERMANY,24,1.97,47.81,7


# Additional Notes

In this section, i save notes about pandas have not in main course.

### And, Or

You can use numpy logical_and/or too

In [None]:
netflix_df = pd.DataFrame([])
movies1990_1999 = netflix_df[
    (netflix_df["release_year"] >= 1990) 
    & (netflix_df["release_year"] <= 1999)
]