# Handling file formats

## 1. CSV

In [25]:
import pandas as pd
animals = pd.read_csv("animals.csv")
animals

Unnamed: 0,animal_type,sex,name,age,colour
0,cat,female,Arta,2,orange
1,dog,male,Bork,5,brown
2,mouse,male,Jerry,30,brown
3,cat,male,Tom,40,blue
4,rat,female,Lariska,25,grey


In [6]:
help(animals.to_dict)

Help on method to_dict in module pandas.core.frame:

to_dict(orient: "Literal['dict', 'list', 'series', 'split', 'tight', 'records', 'index']" = 'dict', into: 'type[dict]' = <class 'dict'>) -> 'dict | list[dict]' method of pandas.core.frame.DataFrame instance
    Convert the DataFrame to a dictionary.
    
    The type of the key-value pairs can be customized with the parameters
    (see below).
    
    Parameters
    ----------
    orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}
        Determines the type of the values of the dictionary.
    
        - 'dict' (default) : dict like {column -> {index -> value}}
        - 'list' : dict like {column -> [values]}
        - 'series' : dict like {column -> Series(values)}
        - 'split' : dict like
          {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
        - 'tight' : dict like
          {'index' -> [index], 'columns' -> [columns], 'data' -> [values],
          'index_names' -> [inde

In [26]:
animals_dict = animals.to_dict()
animals_dict

{'animal_type': {0: 'cat', 1: 'dog', 2: 'mouse', 3: 'cat', 4: 'rat'},
 'sex': {0: 'female', 1: 'male', 2: 'male', 3: 'male', 4: 'female'},
 'name': {0: 'Arta', 1: 'Bork', 2: 'Jerry', 3: 'Tom', 4: 'Lariska'},
 'age': {0: 2, 1: 5, 2: 30, 3: 40, 4: 25},
 'colour': {0: 'orange', 1: 'brown', 2: 'brown', 3: 'blue', 4: 'grey'}}

In [27]:
animals_dict = animals.to_dict('list')
animals_dict

{'animal_type': ['cat', 'dog', 'mouse', 'cat', 'rat'],
 'sex': ['female', 'male', 'male', 'male', 'female'],
 'name': ['Arta', 'Bork', 'Jerry', 'Tom', 'Lariska'],
 'age': [2, 5, 30, 40, 25],
 'colour': ['orange', 'brown', 'brown', 'blue', 'grey']}

In [11]:
animals.to_records()

rec.array([(0, 'cat', 'female', 'Arta',  2, 'orange'),
           (1, 'dog', 'male', 'Bork',  5, 'brown'),
           (2, 'mouse', 'male', 'Jerry', 30, 'brown'),
           (3, 'cat', 'male', 'Tom', 40, 'blue'),
           (4, 'rat', 'female', 'Lariska', 25, 'grey')],
          dtype=[('index', '<i8'), ('animal_type', 'O'), ('sex', 'O'), ('name', 'O'), ('age', '<i8'), ('colour', 'O')])

## 2. Json

In [12]:
# Conversion from pandas DataFrame to json formatted string
help(animals.to_json)

Help on method to_json in module pandas.core.generic:

to_json(path_or_buf: 'FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None' = None, orient: 'str | None' = None, date_format: 'str | None' = None, double_precision: 'int' = 10, force_ascii: 'bool_t' = True, date_unit: 'str' = 'ms', default_handler: 'Callable[[Any], JSONSerializable] | None' = None, lines: 'bool_t' = False, compression: 'CompressionOptions' = 'infer', index: 'bool_t' = True, indent: 'int | None' = None, storage_options: 'StorageOptions' = None) -> 'str | None' method of pandas.core.frame.DataFrame instance
    Convert the object to a JSON string.
    
    Note NaN's and None will be converted to null and datetime objects
    will be converted to UNIX timestamps.
    
    Parameters
    ----------
    path_or_buf : str, path object, file-like object, or None, default None
        String, path object (implementing os.PathLike[str]), or file-like
        object implementing a write() function. If None, the result is

In [34]:
animals_json = animals.to_json(orient='index')
animals_json

'{"0":{"animal_type":"cat","sex":"female","name":"Arta","age":2,"colour":"orange"},"1":{"animal_type":"dog","sex":"male","name":"Bork","age":5,"colour":"brown"},"2":{"animal_type":"mouse","sex":"male","name":"Jerry","age":30,"colour":"brown"},"3":{"animal_type":"cat","sex":"male","name":"Tom","age":40,"colour":"blue"},"4":{"animal_type":"rat","sex":"female","name":"Lariska","age":25,"colour":"grey"}}'

In [1]:
# Creating a json formatted doc
import json

In [37]:
# first
json_dict = json.loads(animals_json) # see above cells for animals_json
json_dict

{'0': {'animal_type': 'cat',
  'sex': 'female',
  'name': 'Arta',
  'age': 2,
  'colour': 'orange'},
 '1': {'animal_type': 'dog',
  'sex': 'male',
  'name': 'Bork',
  'age': 5,
  'colour': 'brown'},
 '2': {'animal_type': 'mouse',
  'sex': 'male',
  'name': 'Jerry',
  'age': 30,
  'colour': 'brown'},
 '3': {'animal_type': 'cat',
  'sex': 'male',
  'name': 'Tom',
  'age': 40,
  'colour': 'blue'},
 '4': {'animal_type': 'rat',
  'sex': 'female',
  'name': 'Lariska',
  'age': 25,
  'colour': 'grey'}}

In [38]:
with open("animals2.json", 'w') as f:
    # write 
    json.dump(animals_dict, f)

## 3. Yaml

In [39]:
import yaml

In [40]:
with open("house_config.yaml", 'r') as f:
    house_config = yaml.safe_load(f)

In [23]:
type(house_config)

dict

In [24]:
house_config


{'animals': [{'arta': None,
   'name': 'Arta',
   'sex': 'female',
   'type': 'cat',
   'age': 2,
   'colour': 'orange'},
  {'bork': None,
   'name': 'Bork',
   'sex': 'male',
   'type': 'dog',
   'age': 5,
   'colour': 'brown'}],
 'people': [{'mother': None, 'name': 'Lina', 'age': 40},
  {'father': None, 'name': 'John', 'age': 39},
  {'child': None, 'name': 'X-Æ-17', 'age': 2}],
 'address': '75 Magic St, Vilnius, Lithuania',
 'welcome_message': 'Welcome to our house!'}

# Serialization with pickle  
Serialization is the process of saving data in a binary format as a snapshot of the memory.

In [42]:
# creating content
class Person:
    def __init__(self, name, age):
        self.name = name
        self.age = age
        
    def say_hello(self):
        return f"Hello! I am {self.name}, and I am {self.age} years old"
    
    def celebrate_birthday(self):
        print("Yahoo! Today is my birthday!")
        self.age += 1
        print(f"Now I am {self.age} years old!")

In [43]:
p = Person("Marta", 5)
p.say_hello()
p.celebrate_birthday()
p.say_hello()

Yahoo! Today is my birthday!
Now I am 6 years old!


'Hello! I am Marta, and I am 6 years old'

In [44]:
# Now serializing
import pickle
with open("marta.pkl", 'wb') as f:
    pickle.dump(p, f)

In [46]:
# And reading again
with open("marta.pkl", "rb") as f:
    np = pickle.load(f)
np

<__main__.Person at 0x1ab46e2e980>

In [47]:
print(np.name, np.age)

Marta 6
