In [37]:
import requests
from datetime import datetime, timedelta
from googleapiclient.discovery import build
from google.oauth2 import service_account
from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload
import io
import pandas as pd

In [38]:
# Get MLB scores from espn
def get_mlb_scores(date):
    base_url = "https://site.api.espn.com/apis/site/v2/sports/baseball/mlb/scoreboard?dates="
    formatted_date = date.strftime('%Y%m%d')
    url = base_url + formatted_date

    response = requests.get(url)
    data = response.json()

    scores = []

    for event in data['events']:
        competitions = event['competitions']
        for competition in competitions:
            team1_data = competition['competitors'][0]
            team2_data = competition['competitors'][1]

            team1 = team1_data['team']['shortDisplayName']
            team2 = team2_data['team']['shortDisplayName']
            if team1 == 'Diamondbacks':
                team1 = 'DBacks'
            if team2 == 'Diamondsbacks':
                team2 = 'DBacks'

            score1 = team1_data['score']
            score2 = team2_data['score']

            scores.append({
                'team1': team1,
                'team2': team2,
                'score1': score1,
                'score2': score2,
            })

    return scores


In [39]:
# Get NBA scores from espn
def get_nba_scores(date):
    base_url = "https://site.api.espn.com/apis/site/v2/sports/basketball/nba/scoreboard?dates="
    formatted_date = date.strftime('%Y%m%d')
    url = base_url + formatted_date

    response = requests.get(url)
    data = response.json()

    scores = []

    for event in data['events']:
        competitions = event['competitions']
        for competition in competitions:
            team1_data = competition['competitors'][0]
            team2_data = competition['competitors'][1]

            team1 = team1_data['team']['shortDisplayName']
            team2 = team2_data['team']['shortDisplayName']

            score1 = team1_data['score']
            score2 = team2_data['score']

            scores.append({
                'team1': team1,
                'team2': team2,
                'score1': score1,
                'score2': score2,
            })

    return scores

In [None]:
# Example usage:
selected_date = datetime(2023, 4, 9)
mlb_scores = get_mlb_scores(selected_date)

for score in mlb_scores:
    print(f"{score['team1']} {score['score1']} - {score['team2']} {score['score2']}")

In [173]:
# Set up credentials
SCOPES = ['https://www.googleapis.com/auth/drive']
SERVICE_ACCOUNT_FILE = 'service_account.json'

creds = service_account.Credentials.from_service_account_file(
        SERVICE_ACCOUNT_FILE, scopes=SCOPES)

# Set up the Drive API
drive_service = build('drive', 'v3', credentials=creds)

In [8]:
# Get mlb files from target folder
def find_mlb_files(folder_id):
    query = f"'{folder_id}' in parents and name contains 'MLB' and mimeType = 'application/vnd.ms-excel'"
    results = drive_service.files().list(q=query, fields="nextPageToken, files(id, name)").execute()
    items = results.get("files", [])

    if not items:
        print("No files found.")
    return items

# Get nba files from target folder
def find_nba_files(folder_id):
    query = f"'{folder_id}' in parents and name contains 'NBA' and mimeType = 'application/vnd.ms-excel'"
    results = drive_service.files().list(q=query, fields="nextPageToken, files(id, name)").execute()
    items = results.get("files", [])

    if not items:
        print("No files found.")
    return items

In [43]:
# Download file from drive
def download_excel_file(file_id, buffer):
    request = drive_service.files().get_media(fileId=file_id)

    downloader = MediaIoBaseDownload(buffer, request)
    done = False
    while done is False:
        status, done = downloader.next_chunk()
        print(f"Download progress: {int(status.progress() * 100)}.")

    buffer.seek(0)

# Upload file to drive
def upload_excel_file(file_name, buffer, folder_id): 
    file_metadata = {
        'name': file_name,
        'parents': [folder_id]
    }
    
    media = MediaIoBaseUpload(buffer, mimetype='application/vnd.ms-excel')
    file = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
    print(f"File ID: {file.get('id')}")


In [181]:
def process_mlb_files(folder_id):
    mlb_files = find_mlb_files(folder_id)
    data_frames = []

    for file_info in mlb_files:
        file_name = file_info['name']
        print(f"Processing {file_name}")

        # Use a buffer instead of a local file path
        file_buffer = io.BytesIO()
        download_excel_file(file_info["id"], file_buffer)
        file_buffer.seek(0)

        # Read the Excel file into a pandas DataFrame
        df = pd.read_excel(file_buffer, sheet_name=0, engine='openpyxl')

        # Find the index of the first duplicate team
        row_index = df[df['Team'].duplicated()].index[0]

        # Create a new row with the updated date and the same data as the specified row
        new_row = df.loc[row_index].copy()
        
        # Parse the date string, add one day, and convert it back to a string
        original_date_str = new_row['Date']
        original_date = datetime.strptime(original_date_str, '%A, %B %d')
        new_date = original_date + timedelta(days=1)
        new_date_str = new_date.strftime('%A, %B %d')

        # Update the date in the new row
        new_row['Date'] = new_date_str

        # Insert the new row into the DataFrame
        df = pd.concat([df.iloc[:row_index], pd.DataFrame(new_row).T, df.iloc[row_index:]]).reset_index(drop=True)

        # Get MLB scores
        date_string = file_name[4:23]  # Extract the date from the filename with the format "MLB YYYY-MM-DD HH:MM:SS.xlxs"
        date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S")
        scores = get_mlb_scores(date)

        # Create a new column named "scores" and initialize it with None
        df['scores'] = None

        # Iterate over the scores and match them with the team names in the DataFrame
        for score in scores:
            team1 = score['team1']
            team2 = score['team2']
            score1 = score['score1']
            score2 = score['score2']

            # Find the rows that match the team names and update the "scores" column
            df.loc[df.iloc[:, 0].str.contains(team1), 'scores'] = score1
            df.loc[df.iloc[:, 0].str.contains(team2), 'scores'] = score2

        # Save the modified DataFrame to the buffer
        file_buffer.seek(0)
        df.to_excel(file_buffer, index=False, sheet_name='MLB')
        file_buffer.seek(0)

        # Upload the modified file to the same Google Drive folder
        new_file_name = f"Updated_{file_name}"
        upload_excel_file(new_file_name, file_buffer, folder_id)

        data_frames.append(df)

    return data_frames


In [None]:
def process_nba_files(folder_id):
    nba_files = find_nba_files(folder_id)
    data_frames = []

    for file_info in nba_files:
        file_name = file_info['name']
        print(f"Processing {file_name}")

        # Use a buffer instead of a local file path
        file_buffer = io.BytesIO()
        download_excel_file(file_info["id"], file_buffer)
        file_buffer.seek(0)

        # Read the Excel file into a pandas DataFrame
        df = pd.read_excel(file_buffer, sheet_name=0, engine='openpyxl')

        # Find the index of the first duplicate team
        row_index = df[df['Team'].duplicated()].index[0]

        # Create a new row with the updated date and the same data as the specified row
        new_row = df.loc[row_index].copy()
        
        # Parse the date string, add one day, and convert it back to a string
        original_date_str = new_row['Date']
        original_date = datetime.strptime(original_date_str, '%A, %B %d')
        new_date = original_date + timedelta(days=1)
        new_date_str = new_date.strftime('%A, %B %d')

        # Update the date in the new row
        new_row['Date'] = new_date_str

        # Insert the new row into the DataFrame
        df = pd.concat([df.iloc[:row_index], pd.DataFrame(new_row).T, df.iloc[row_index:]]).reset_index(drop=True)

        # Get NBA scores
        date_string = file_name[4:23]  # Extract the date from the filename with the format "MLB YYYY-MM-DD HH:MM:SS.xlxs"
        date = datetime.strptime(date_string, "%Y-%m-%d %H:%M:%S")
        scores = get_nba_scores(date)

        # Create a new column named "scores" and initialize it with None
        df['scores'] = None

        # Iterate over the scores and match them with the team names in the DataFrame
        for score in scores:
            team1 = score['team1']
            team2 = score['team2']
            score1 = score['score1']
            score2 = score['score2']

            # Find the rows that match the team names and update the "scores" column
            df.loc[df.iloc[:, 0].str.contains(team1), 'scores'] = score1
            df.loc[df.iloc[:, 0].str.contains(team2), 'scores'] = score2

        # Save the modified DataFrame to the buffer
        file_buffer.seek(0)
        df.to_excel(file_buffer, index=False, sheet_name='NBA')
        file_buffer.seek(0)

        # Upload the modified file to the same Google Drive folder
        new_file_name = f"Updated_{file_name}"
        upload_excel_file(new_file_name, file_buffer, folder_id)

        data_frames.append(df)

    return data_frames


In [None]:
folder_id = "1dgcJfP0vlBzhLbksWAqmmbXoDW4I27-8"
data_frames = process_mlb_files(folder_id)
data_frames = process_nba_files(folder_id)