## Week 4: Reading Files and Importing Modules

By the end of this week, you will be able to:
- Read .txt, .json. and .csv files and use their contents
- Use advanced string manipulations to extract relevant data
- Import modules and work with dates

#### Importing Modules
- Using standard library modules like `math`, `datetime`
- Examples of using packages imported from modules

#### Datetime Operations

#### TXT, JSON and CSV files
- Reading and writing TXT, JSON and CSV files
- Examples of basic CSV read and write functions for player statistics and match results

#### Advanced String Manipulations
- Accessing substrings to find specific player names or statistics
- Concatenating strings to form full match summaries and other string manipulations
- Using `.split()` and `str.join` functions to manipulate player and match data

In [None]:
# Reading a .txt file containing soccer match statistics and printing its content
with open('soccer_stats.txt', 'r') as file:
    content = file.read()
    print(content)

# Reading a .txt file line by line to extract soccer statistics
with open('soccer_stats.txt', 'r') as file:
    for line in file:
        print(line.strip())

# Writing to a .txt file with summary of soccer match
with open('soccer_summary.txt', 'w') as file:
    file.write('Final Score: Team A 2 - 1 Team B')

In [None]:
import math
import datetime
from datetime import timedelta

# Using math to calculate a player's goal per game ratio
goals = 16
games = 20
goal_ratio = math.ceil(goals / games)
print(goal_ratio)

# Using datetime to find the next match date
next_match = datetime.date(2023, 8, 9)
print(f'Next match date is: {next_match}')

In [None]:
currentDate = datetime.datetime.now()

currentDate.date()

currentDate.date() + timedelta(days=3)

oldDate = datetime.datetime(2023, 9, 1, 12, 0, 0)

(currentDate - oldDate) / timedelta(days=1)



In [None]:
# Open the file in read mode ('r')
with open("matchStream.txt", "r") as fileValue:
    # Read the entire file into a variable
    content = fileValue.read()
    print("Full content of the file:")
    print(content)

    # Move the file cursor to the beginning of the file
    fileValue.seek(0)

    # Read the file line by line
    print("\nReading line by line:")
    for line in fileValue:
        print(line.strip())


In [None]:
line.split(',')

In [None]:
# Build on your reading .json function above, make a new function that takes 
# in a file path name, and outputs the name of each player that has > 20 goals and < 3 
# yellow cards
# If there are none, print 'NO PLAYERS FOUND' at the end

In [None]:
import math
import datetime
from datetime import timedelta

# Using math to calculate a player's goal per game ratio
goals = 16
games = 20
print(goals/games)
goal_ratio = math.ceil(goals / games)
print(goal_ratio)

# # Using datetime to find the next match date
# next_match = datetime.date(2023, 8, 9)
# print(f'Next match date is: {next_match}')


### Structure of modules and packages
### MODULE A
     # Within the module, we have:
     # def functionX
     # def functionY
     # def functionZ
# If we want to use functionZ for our program, we would say
# from MODULEA import functionZ

In [None]:
import math

# Using math to calculate a player's goal per game ratio
goals = 16
games = 20
print(goals/games)
goal_ratio = math.ceil(goals / games)
print(goal_ratio)

# IS THE SAME AS:

In [None]:
from math import ceil, sqrt

# Using math to calculate a player's goal per game ratio
goals = 16
games = 20
print(goals/games)
goal_ratio = ceil(goals / games)
print(goal_ratio)

sqrt(0.25)

In [None]:
# We've thought about different data types and data structures
# Data types: Float, int, string, boolean
# Data Structures: Lists, dictionaries

# Now... we have a "date" and "datetime" data types

In [None]:
import datetime
from datetime import timedelta

# something.something_else --> the dot syntax here references a member/unit of the "something"

# Using datetime to find the next match date
next_match = datetime.date(2023, 8, 9)
print(f'Next match date is: {next_match}')
next_match

In [None]:
# Making a datetime object
last_match_time = datetime.datetime(2023, 8, 9, 12, 15, 7)

In [None]:
print(last_match_time)
last_match_time

In [None]:
print(f'Last match time is: {last_match_time}')

In [None]:
last_match_time.weekday()   # -> IS A WEDNESDAY
# MON - 0
# TUES - 1
# WED - 2
# THURS - 3
# FRI - 4
# SAT - 5
# SUN - 6

In [None]:
# Quick exercise
# Define a new datetime variable and return its weekday
# Return its hour (try to google to find)

In [None]:
newday = datetime.datetime(2024, 1, 21, 16, 38)
print(newday)

print(newday.weekday())
print(newday.hour)

In [None]:
# datetime.datetime object
#    hour
#    minutes
#    seconds
#    year

In [None]:
datetime.datetime.now()

In [None]:
# Big Class: datetime
    # Smaller class within it: datetime
        # Smaller class has a function "now()"

In [None]:
from datetime import timedelta

currentDate = datetime.datetime.now()

currentDate

print(currentDate.date())

currentDate.date() + timedelta(days=3)

oldDate = datetime.datetime(2023, 9, 1, 12, 0, 0)

(currentDate - oldDate) / timedelta(days=1)



In [None]:
oldDate1 = datetime.datetime(2023, 9, 1, 12, 0, 0)
oldDate2 = datetime.datetime(2023, 9, 1, 12, 32, 0)

(oldDate2 - oldDate1) / timedelta(seconds=1)

In [None]:
# Practice problems:

# Specific Date and Time: Create a datetime object for January 1, 2025, at 15:45 hours, and print it out.

# Weekday of a Date: Write a program to find out the weekday (e.g., Monday, Tuesday) of your next birthday.

# Difference Between Two Dates: Calculate the number of days between January 1, 2020, and today.

# Adding Days to a Date: Write a program that adds 100 days to the current date and displays the result.


In [None]:
# Open the file in read mode ('r')
with open("matchStream.txt", "r") as fileValue:
    # Read the entire file into a variable
#     content = fileValue.read()
#     print("Full content of the file:")
#     print(content)

#     # Move the file cursor to the beginning of the file
#     fileValue.seek(0)

    # Read the file line by line
    print("\nReading line by line:")
    for lineIdx, line in enumerate(fileValue):
        print('Line Index: ', lineIdx)
        print(line.strip())
        print('\n')
        


In [None]:
line

In [None]:
line.strip()

In [None]:
testStr = 'Man Utd\n'
testStr[:-1]

In [None]:
cleanedLineOfData = line.strip().split(',')

cleanedLineOfData[-1]


In [None]:
# Exercise
# Make a dictionary where each key corresponds to a column in the above data
# Loop through matchStream.txt and add data as you read it into your dictionary

dataDict = {}
# Action can be goal, pass, assist
# Player name
# Player team
# Opp. team

dataDict['action'] = []
dataDict['player'] = []
dataDict['playerTeam'] = []
dataDict['oppTeam'] = []

with open("matchStream.txt", "r") as fileValue:
    for lineIdx, line in enumerate(fileValue):
        cleanedLineOfData = line.strip().split(',')
        print(cleanedLineOfData)
        
        dataDict['action'].append(cleanedLineOfData[0])
        dataDict['player'].append(cleanedLineOfData[1])
        dataDict['playerTeam'].append(cleanedLineOfData[2])
        dataDict['oppTeam'].append(cleanedLineOfData[3])

In [None]:
dataDict

In [None]:
### JUST FUTURE MOTIVATION
import pandas as pd
dataa = pd.DataFrame(dataDict)
dataa

In [None]:
# Build on your reading player_stats.json function above, make a new function that takes 
# in a file path name, and outputs the name of each player that has > 20 goals and < 3 
# yellow cards
# If there are none, print 'NO PLAYERS FOUND' at the end

### SOLUTION:
import json

def player_info(file):
    with open(file, 'r') as f:
        player_stats = json.load(f)
        #print(player_stats)

        players_list = []
        for info in player_stats:
#             print('This is one line of info:')
#             print(info)
#             print('\n')
            if (info["goals"] > 20) & (info["yellow_cards"] < 3):
                players_list.append(info["name"])
                #print(players_list)
#                 print(f'The following player has more than 20 goals but less than 3 yellow cards:{players_list}')
                print(f'The following player has more than 20 goals but less than 3 yellow cards:{info["name"]}')
        print('All good players:')
        print(players_list)

In [None]:
player_info('player_stats.json')

In [None]:
with open('player_stats.json', 'r') as f:
    player_stats = json.load(f)

In [None]:
player_stats # --> LIST OF DICTIONARIES

In [None]:
# List of rows [{First Row of Data}, 
#               {Second Row of Data}, 
#               {Third Row of Data} ...]

In [None]:
# HOMEWORK UPDATED FOR 21/1/24

# Build on your reading player_stats.json function above, make a new function that takes 
# in a file path name, and outputs the name of each player that has > 20 goals and < 3 
# yellow cards
# If there are none, print 'NO PLAYERS FOUND' at the end

# Read in match_results.json, containing data on teams, scores, and date of match
# You have two tasks: Return the number of games played on 2024-03-06 as well as the average goal differential
# Report the number of games Manchester United has won, and their most recent win
# You must also report the top 3 teams with the highest win rate

# Read in football_players.csv and report on statistics of the data available
# Give the average number of goals scored and average number of assists per position (Forward, Midfielder, etc)
# Which team has the highest average number of goals scored?
# (OPTIONAL): Is there a relationship between the number of goals scored per player and the number of assists? How could you test this?


# Age Calculator: Write a program that calculates your age in years, months, and days, given your birthdate. (Make a function!)

# Time Difference in Seconds: Calculate the difference in seconds between two datetime objects: datetime(2024, 1, 1, 0, 0) and datetime(2024, 1, 2, 1, 30).

# Last Day of the Month: Write a program to find the last day of the month for any given month and year (e.g., for February 2023).

In [None]:
lmt = datetime.datetime(2023, 8, 9, 12, 15, 7)

In [None]:
import json
import csv

# Reading a JSON file containing player statistics
with open('player_stats.json', 'r') as f:
    player_stats = json.load(f)
    print(player_stats)

In [None]:
player_stats

In [None]:
# Writing to a JSON file for a new player's statistics
with open('new_player.json', 'w') as f:
    json.dump({'player_name': 'John', 'goals': 5, 'assists': 7}, f)

In [None]:
# How do we read our new_player.json?

In [None]:
# Reading a CSV file containing match results
with open('match_results.csv', 'r') as file:
    csv_reader = csv.reader(file)
    for row in csv_reader:
        print(row)

# Writing to a CSV file to summarize recent match results
with open('recent_matches.csv', 'w', newline='') as file:
    csv_writer = csv.writer(file)
    csv_writer.writerow(['Date', 'Team A', 'Team B', 'Team A Score', 'Team B Score'])
    csv_writer.writerow(['2023-08-09', 'Leeds United', 'Newcastle United', '2', '3'])

In [None]:
row

In [None]:
# Make a function to read a .csv file 
# The input is the file path name, the output is printing out each row of the .csv
# At the end, return the string "success"

In [None]:
# Accessing substrings to find specific player names
match_summary = 'John scored 2 goals'
player_name = match_summary[0:4]
print(f'Player name extracted: {player_name}')

# Using .split() function to break down match stats
stats = 'Goals:5, Assists:3, Cards:1'
stats_list = stats.split(', ')
print(f'Stats list: {stats_list}')

# Using str.join function to concatenate match summary
words = ['Final', 'Score:', '3-1']
summary = ' '.join(words)
print(f'Match Summary: {summary}')

# Using str.find function to locate a keyword in match summary
position = match_summary.find('scored')
print(f'Keyword position: {position}')

In [None]:
# you are given a list of team names and a target team name
# return the index of the list that matches the target team name

# Example: Input: listTeamNames = ['Liverpool', 'Arsenal', 'Tottenham', 'Manchester City', 'Newcastle']; targetTeamName = 'Arsenal'
#            Output: 1

# Example: Input: listTeamNames = ['Liverpool', 'Arsenal', 'Tottenham', 'Manchester City', 'Newcastle']; targetTeamName = 'Newcastle'
#            Output: 4