# Writing Files - Tasks

In [2]:
import pandas as pd

In [2]:
# read the file containing the elements, remove the comments and write to a new file
data = pd.read_csv('data/301-commented.csv', comment='#')
data.to_csv('data/301.csv')

In [5]:
# convert a PDB to XYZ file
# read PDB file and extract atom positions - note that we won't use pd.read_fwf in this example
atom_lines = []
with open('data/301-dna.pdb', 'r') as f:
    for line in f.readlines():
        if line.startswith('ATOM'):
            atom_lines.append(line.split())
atoms = pd.DataFrame(atom_lines)[[6, 7, 8, 11]].rename(columns={6: 'x', 7: 'y', 8: 'z', 11: 'element'})

# XYZ files have the total number of atoms at the top of the file
# then a space, then the element, X, Y, Z as space-delimited values

# e.g.
# 
# 3
# 
# H 0.0 0.0 1.0
# O 0.1 1.0 2.0
# H 1.0 1.0 0.0
#

with open('data/data.xyz', 'w') as f:
    f.write(f'{len(atoms)}\n\n')
    # reorder the columns in atoms
    atoms = atoms[['element', 'x', 'y', 'z']]
    atoms.to_csv(f, sep=' ', header=False, index=False, mode='a')

In [3]:
# get the stock prices of AAPL and GOOGL and write the indexed values of the 'close' column (i.e. when time=0, price=100) 
# since 2020 to a file; the file should have columns: date, aapl, googl

aapl = pd.read_csv('data/302-aapl.csv')
googl = pd.read_csv('data/302-googl.csv')

# use merge to merge the dataframes
data = pd.merge(
    aapl[['date', 'close']], 
    googl[['date', 'close']],
    on='date'
)

# rename columns
data.columns = 'date', 'aapl', 'googl'

# rescale
data['aapl'] = 100 * data['aapl'] / data['aapl'].iloc[0]
data['googl'] = 100 * data['googl'] / data['googl'].iloc[0]

data.to_csv('data/out-stocks.csv')

In [4]:
# use the example in 301-tasks to get the data from https://fantasy.premierleague.com/api/bootstrap-static/
# create a dataframe from a web-based API
from urllib.request import urlopen
import json
string = urlopen("https://fantasy.premierleague.com/api/bootstrap-static/").read()
data = json.loads(string)

# get team name from ID
teams = pd.DataFrame(data['teams'])
team_map = {}
for i, row in enumerate(teams['id']):
    team_map[row] = teams['name'][i]
print(team_map)

# the data for the players is stored in data['elements'] -> use this to form a new dataframe
players = pd.DataFrame(data['elements'])

# loop over all teams and write their players to a CSV file
for ID, name in team_map.items():
    team = players[ players['team'] == ID ]
    print(name)
    print(team.head())
    team.to_csv(f'data/{name}.csv')
    

{1: 'Arsenal', 2: 'Aston Villa', 3: 'Bournemouth', 4: 'Brentford', 5: 'Brighton', 6: 'Chelsea', 7: 'Crystal Palace', 8: 'Everton', 9: 'Fulham', 10: 'Ipswich', 11: 'Leicester', 12: 'Liverpool', 13: 'Man City', 14: 'Man Utd', 15: 'Newcastle', 16: "Nott'm Forest", 17: 'Southampton', 18: 'Spurs', 19: 'West Ham', 20: 'Wolves'}
Arsenal
   chance_of_playing_next_round  chance_of_playing_this_round    code  \
0                           0.0                           0.0  438098   
1                         100.0                         100.0  205651   
2                          75.0                           NaN  226597   
3                         100.0                         100.0  219847   
4                           0.0                           0.0  463748   

   cost_change_event  cost_change_event_fall  cost_change_start  \
0                  0                       0                 -1   
1                  0                       0                 -2   
2                  0        