### Git 


In [None]:
"""
Our main agenda is to upload/push to GitHub.

Steps before pushing
- Make sure Git is initialised 

        git init 
- Stage our changes
    
        git add <argument>

        #change for notebook alone
        git add robot.ipynb

        #track all the files
        git add .

- Taking the snapshot with a label
        git commit -m 'First Commit'

- Uploading to GitHub
        git push origin <name of your branch>
    
"""

" git add remote origin <url> "

### Python File I/O
Python's built in ways to opening/loading files. ( Input and Output.)

**open()** is the dedicated function in python to load files.
```
Syntax:

open(filename, mode)
```

**Mode**
* 'r'  ---> read only
* 'w'  ---> write only(If file exists its overwritten else if not exists, it creates a new file)
* 'r+' ---> read and write
* 'a'  ---> append only(going to write new data at the end of the file without erasing existing.)


In [10]:
#this is how we usually open our csv files
import pandas as pd

df = pd.read_csv('sample.csv')

df.head()

Unnamed: 0,A,B,C,D,E
0,1,2,3,4,5
1,1,2,3,4,5
2,1,2,3,4,5


In [5]:
x = open('./sample.csv', 'r')

x.read()


'A,B,C,D,E\n1,2,3,4,5\n1,2,3,4,5\n1,2,3,4,5'

In [6]:
x.close()

## 'with'
This automatically opens and closes the file when using the `open()` function.

In [8]:
with open('sample.csv', 'r') as f:
    print(f.read())

A,B,C,D,E
1,2,3,4,5
1,2,3,4,5
1,2,3,4,5


In [16]:
import csv

with open('sample.csv') as f:
    reader = csv.DictReader(f)
    #
    for row in reader:
        print(row['Amsterdam'], row['Berlin'])

100 266
122 256
131 234


In [11]:
type(df)

pandas.core.frame.DataFrame

Create a new file and write to it.

In [20]:
with open('cities.txt', 'w') as f:
    f.write('Amsterdam | Berlin | Copenhagen')

In [21]:
with open('cities.txt', 'w') as f:
    f.write('Dallas | London | Cairo')

## JSON
JSON is `Javascript Object Notation`

In [3]:
import json


#with open to load the json file
with open('playlist.json', 'r') as f:
    data = json.load(f)


data['playlist_name']

'My_Jams'

In [3]:
type(data['tracks'])

list

In [13]:
#with open to load the json file
with open('playlist.json', 'r') as f:
    data = json.load(f)



type(data)

dict

In [None]:
#not recommended because everything will become a string
with open('playlist.json', 'r') as f:
    dt = f.read()

dt

'{\n  "playlist_name": "My_Jams",\n  "created_by": "Chacha",\n  "tracks": [\n    {\n      "title": "Stan",\n      "artist": "Eminem",\n      "album": "The Marshall Mathers LP",\n      "duration_sec": 405\n    },\n    {\n      "title": "In Da Club",\n      "artist": "50 Cent",\n      "album": "Get Rich or Die Tryin",\n      "duration_sec": 240\n    },\n    {\n      "title": "Suzanna",\n      "artist": "Sauti Sol",\n      "album": "Midnight Train",\n      "duration_sec": 239\n    },\n    {\n      "title": "Many Men",\n      "artist": "50 Cent",\n      "album": "Get Rich or Die Tryin",\n      "duration_sec": 256\n    },\n    {\n      "title": "Bad Liar",\n      "artist": "Imagine Dragons",\n      "album": "Origins",\n      "duration_sec": 243\n    }\n  ]\n}\n'

In [None]:
type(dt)


'{\n  "playlist_name": "My_Jams",\n  "created_by": "Chacha",\n  "tracks": [\n    {\n      "title": "Stan",\n      "artist": "Eminem",\n      "album": "The Marshall Mathers LP",\n      "duration_sec": 405\n    },\n    {\n      "title": "In Da Club",\n      "artist": "50 Cent",\n      "album": "Get Rich or Die Tryin",\n      "duration_sec": 240\n    },\n    {\n      "title": "Suzanna",\n      "artist": "Sauti Sol",\n      "album": "Midnight Train",\n      "duration_sec": 239\n    },\n    {\n      "title": "Many Men",\n      "artist": "50 Cent",\n      "album": "Get Rich or Die Tryin",\n      "duration_sec": 256\n    },\n    {\n      "title": "Bad Liar",\n      "artist": "Imagine Dragons",\n      "album": "Origins",\n      "duration_sec": 243\n    }\n  ]\n}\n'

## Exploring JSON
Now that we've loaded it lets check what is held within it.

In [14]:
data.keys()

dict_keys(['playlist_name', 'created_by', 'tracks'])

In [15]:
data.items()

dict_items([('playlist_name', 'My_Jams'), ('created_by', 'Chacha'), ('tracks', [{'title': 'Stan', 'artist': 'Eminem', 'album': 'The Marshall Mathers LP', 'duration_sec': 405}, {'title': 'In Da Club', 'artist': '50 Cent', 'album': 'Get Rich or Die Tryin', 'duration_sec': 240}, {'title': 'Suzanna', 'artist': 'Sauti Sol', 'album': 'Midnight Train', 'duration_sec': 239}, {'title': 'Many Men', 'artist': '50 Cent', 'album': 'Get Rich or Die Tryin', 'duration_sec': 256}, {'title': 'Bad Liar', 'artist': 'Imagine Dragons', 'album': 'Origins', 'duration_sec': 243}])])

In [16]:
data.values()

dict_values(['My_Jams', 'Chacha', [{'title': 'Stan', 'artist': 'Eminem', 'album': 'The Marshall Mathers LP', 'duration_sec': 405}, {'title': 'In Da Club', 'artist': '50 Cent', 'album': 'Get Rich or Die Tryin', 'duration_sec': 240}, {'title': 'Suzanna', 'artist': 'Sauti Sol', 'album': 'Midnight Train', 'duration_sec': 239}, {'title': 'Many Men', 'artist': '50 Cent', 'album': 'Get Rich or Die Tryin', 'duration_sec': 256}, {'title': 'Bad Liar', 'artist': 'Imagine Dragons', 'album': 'Origins', 'duration_sec': 243}]])

In [17]:
#accessing the third song
data['tracks'][2]['title']

'Suzanna'

In [18]:
data['tracks']

[{'title': 'Stan',
  'artist': 'Eminem',
  'album': 'The Marshall Mathers LP',
  'duration_sec': 405},
 {'title': 'In Da Club',
  'artist': '50 Cent',
  'album': 'Get Rich or Die Tryin',
  'duration_sec': 240},
 {'title': 'Suzanna',
  'artist': 'Sauti Sol',
  'album': 'Midnight Train',
  'duration_sec': 239},
 {'title': 'Many Men',
  'artist': '50 Cent',
  'album': 'Get Rich or Die Tryin',
  'duration_sec': 256},
 {'title': 'Bad Liar',
  'artist': 'Imagine Dragons',
  'album': 'Origins',
  'duration_sec': 243}]

In [19]:
data['tracks'][2]

{'title': 'Suzanna',
 'artist': 'Sauti Sol',
 'album': 'Midnight Train',
 'duration_sec': 239}

In [20]:
data['tracks'][2]['title']

'Suzanna'

In [21]:
## example 2
data['tracks']

[{'title': 'Stan',
  'artist': 'Eminem',
  'album': 'The Marshall Mathers LP',
  'duration_sec': 405},
 {'title': 'In Da Club',
  'artist': '50 Cent',
  'album': 'Get Rich or Die Tryin',
  'duration_sec': 240},
 {'title': 'Suzanna',
  'artist': 'Sauti Sol',
  'album': 'Midnight Train',
  'duration_sec': 239},
 {'title': 'Many Men',
  'artist': '50 Cent',
  'album': 'Get Rich or Die Tryin',
  'duration_sec': 256},
 {'title': 'Bad Liar',
  'artist': 'Imagine Dragons',
  'album': 'Origins',
  'duration_sec': 243}]

In [22]:
data['tracks'][-1]

{'title': 'Bad Liar',
 'artist': 'Imagine Dragons',
 'album': 'Origins',
 'duration_sec': 243}

In [None]:
data['tracks'][-1]['album']

'Origins'

## Transforming JSON Schema
Here we want to transform JSON into a flatter structure, which we can use for analysis.

In [4]:
"""
Load the JSON File,
iterate over the tracks and create a new dictionary from the data points

'artist':['Eminem', '50 Cent', 'Sauti Sol', '50 Cent', 'Imagine Dragons']

"""
tracks = data['tracks']

tracks


[{'title': 'Stan',
  'artist': 'Eminem',
  'album': 'The Marshall Mathers LP',
  'duration_sec': 405,
  'features': 'Jay Z'},
 {'title': 'In Da Club',
  'artist': '50 Cent',
  'album': 'Get Rich or Die Tryin',
  'duration_sec': 240},
 {'title': 'Suzanna',
  'artist': 'Sauti Sol',
  'album': 'Midnight Train',
  'duration_sec': 239},
 {'title': 'Many Men',
  'artist': '50 Cent',
  'album': 'Get Rich or Die Tryin',
  'duration_sec': 256},
 {'title': 'Bad Liar',
  'writer': 'Imagine Dragons',
  'album': 'Origins',
  'duration_sec': 243}]

In [33]:
flattened_tracks = []


for track in tracks:
    flattened_tracks.append({
        'TITLE': track['title'],
        'ARTIST':track['artist'],
        'ALBUM': track['album'],
        'LENGTH OF SONG': track['duration_sec'],
        'LENGTH IN MIN' : round(track['duration_sec'] / 60 ,2)
    })

flattened_tracks

[{'TITLE': 'Stan',
  'ARTIST': 'Eminem',
  'ALBUM': 'The Marshall Mathers LP',
  'LENGTH OF SONG': 405,
  'LENGTH IN MIN': 6.75},
 {'TITLE': 'In Da Club',
  'ARTIST': '50 Cent',
  'ALBUM': 'Get Rich or Die Tryin',
  'LENGTH OF SONG': 240,
  'LENGTH IN MIN': 4.0},
 {'TITLE': 'Suzanna',
  'ARTIST': 'Sauti Sol',
  'ALBUM': 'Midnight Train',
  'LENGTH OF SONG': 239,
  'LENGTH IN MIN': 3.98},
 {'TITLE': 'Many Men',
  'ARTIST': '50 Cent',
  'ALBUM': 'Get Rich or Die Tryin',
  'LENGTH OF SONG': 256,
  'LENGTH IN MIN': 4.27},
 {'TITLE': 'Bad Liar',
  'ARTIST': 'Imagine Dragons',
  'ALBUM': 'Origins',
  'LENGTH OF SONG': 243,
  'LENGTH IN MIN': 4.05}]

In [52]:
import pandas as pd

with open('playlist.json', 'r') as f:
    playlist = json.load(f)


#using pandas to make it a csv
df = pd.DataFrame(playlist['tracks'])

df.head()

# df.to_csv('tracs.csv', index=True)

Unnamed: 0,title,artist,album,duration_sec,features,writer
0,Stan,Eminem,The Marshall Mathers LP,405,Jay Z,
1,In Da Club,50 Cent,Get Rich or Die Tryin,240,,
2,Suzanna,Sauti Sol,Midnight Train,239,,
3,Many Men,50 Cent,Get Rich or Die Tryin,256,,
4,Bad Liar,,Origins,243,,Imagine Dragons


In [None]:
df.describe()

Unnamed: 0,duration_sec
count,5.0
mean,276.6
std,72.099237
min,239.0
25%,240.0
50%,243.0
75%,256.0
max,405.0


In [34]:
import numpy as np

np.mean([1,2, 3,5, 5])



np.float64(3.2)

In [None]:
list(data['tracks'][0].keys())

['title', 'artist', 'album', 'duration_sec']

In [5]:
flattened_tracks = []


for track in tracks:
    flattened_tracks.append({
        'TITLE': track['title'],
        'ALBUM':track['album'],
        'LENGTH IN MIN' : round(track['duration_sec'] / 60 ,2)
    })

flattened_tracks

[{'TITLE': 'Stan', 'ALBUM': 'The Marshall Mathers LP', 'LENGTH IN MIN': 6.75},
 {'TITLE': 'In Da Club',
  'ALBUM': 'Get Rich or Die Tryin',
  'LENGTH IN MIN': 4.0},
 {'TITLE': 'Suzanna', 'ALBUM': 'Midnight Train', 'LENGTH IN MIN': 3.98},
 {'TITLE': 'Many Men',
  'ALBUM': 'Get Rich or Die Tryin',
  'LENGTH IN MIN': 4.27},
 {'TITLE': 'Bad Liar', 'ALBUM': 'Origins', 'LENGTH IN MIN': 4.05}]

In [7]:
import pandas as pd

pd.DataFrame(flattened_tracks).head()

Unnamed: 0,TITLE,ALBUM,LENGTH IN MIN
0,Stan,The Marshall Mathers LP,6.75
1,In Da Club,Get Rich or Die Tryin,4.0
2,Suzanna,Midnight Train,3.98
3,Many Men,Get Rich or Die Tryin,4.27
4,Bad Liar,Origins,4.05


In [10]:
import json
import csv


#loading the json file
with open('playlist.json', 'r') as f:
    data = json.load(f)


#creating a csv file 
with open('playlist.csv', 'w', newline="") as f:
    writer = csv.DictWriter(f, fieldnames=['title', 'artist', 'album', 'duration_sec'])
    writer.writeheader()
    writer.writerows(data['tracks'])


