In [10]:
from pathlib import Path
import pandas as pd

# Pracujemy z plikami - JSON

## Czym jest JSON?

JSON (JavaScript Object Notation) to format wymiany danych, który jest łatwy do odczytu i pisania przez ludzi, a także łatwy do analizy i generowania przez komputery. 

In [11]:
with open("simple.json", "w") as f:
    f.write("""
[
    {
        "name": "John",
        "age": 30,
        "cars": 0
    },
    {
        "name": "Jane",
        "age": 25,
        "cars": 1
    }
]
""")

In [12]:
df = pd.read_json("simple.json")
df

Unnamed: 0,name,age,cars
0,John,30,0
1,Jane,25,1


## Jak pracować z zagnieżdżonymi danymi w JSON?

In [1]:
with open("nested.json", "w") as f:
    f.write("""
[
    {
        "name": "John",
        "age": 30,
        "car": {
            "model": "Toyota",
            "year": 2000
        }
    },
    {
        "name": "Jane",
        "age": 25,
        "car": {
            "model": "Ford",
            "year": 2015
        }
    }
]
""")

## moje próby

In [14]:
with open("moj.json", "w") as g:
    g.write("""
[
    {
    "marka": "Audi",
    "model": "Q3 Sportback",
    "wersje": {
        "Active": 120800, 
        "Executive": 140400,
        "S": 199000
        }
        },
        {
    "marka": "TOYOTA",
    "model": "C-HR",
    "wersje": {
        "Comfort": 120800,
        "Style": 140400,
        "GR Sport": 199000
        }
    }
]
""")

In [15]:
df = pd.read_json("moj.json")
df

Unnamed: 0,marka,model,wersje
0,Audi,Q3 Sportback,"{'Active': 120800, 'Executive': 140400, 'S': 1..."
1,TOYOTA,C-HR,"{'Comfort': 120800, 'Style': 140400, 'GR Sport..."


In [16]:
#tu trzeba jakoś inaczej to ugryźć
df1[["wersje", "cena_wersji"]] = pd.json_normalize(df1["wersje"])
df1.drop(columns=["wersje"], inplace=True)
df1

NameError: name 'df1' is not defined

In [17]:
df = pd.read_json("nested.json")
df

Unnamed: 0,name,age,car
0,John,30,"{'model': 'Toyota', 'year': 2000}"
1,Jane,25,"{'model': 'Ford', 'year': 2015}"


In [9]:
df[["car_model", "car_year"]] = pd.json_normalize(df["car"])
df.drop(columns=["car"], inplace=True)
df

NameError: name 'pd' is not defined

## Jak wczytywać daty z pliku JSON?

In [None]:
with open("with_dates.json", "w") as f:
    f.write("""
[
    {
        "name": "John",
        "age": 30,
        "date of birth": "1990-01-01",
        "visit date": "2020-01-01T12:00:00"
    },
    {
        "name": "Jane",
        "age": 25,
        "date of birth": "1995-01-01",
        "visit date": "2020-01-01T13:00:00"
    }
]
""")

In [None]:
df = pd.read_json("with_dates.json")
df

In [None]:
df.info()

In [None]:
df = pd.read_json("with_dates.json", convert_dates=["date of birth", "visit date"])
df

In [None]:
df.info()

In [None]:
df["date of birth"].dt.year

## Wczytywanie JSON-a ze ścieżki 

In [None]:
# wyobraźmy sobie, że plik z danymi żyje w jakiejś strukturze katalogów
# tworzymy więc zmienną DATA_PATH, która przechowuje ścieżkę do katalogu z danymi
DATA_PATH = Path(".").absolute()

# dzięki temu możemy łatwo odwoływać się do plików wewnątrz tego katalogu
data_path = DATA_PATH / "with_dates.json"

df = pd.read_json(data_path, convert_dates=["date of birth", "visit date"])
df