In [None]:
import time
import requests
import json

def fetch_schedule():
    schedule_url = "https://statsapi.mlb.com/api/v1/schedule?sportId=1&season=2024"
    response = requests.get(schedule_url)
    return response.json()

def detect_updates(current_data, new_data):
    if current_data != new_data:
        print("New data detected!")
        # Process the updated data here
        return new_data
    return current_data

# Polling loop
def start_polling(interval=60):  # Check every 60 seconds
    print("Starting polling...")
    current_data = fetch_schedule()

    while True:
        time.sleep(interval)
        try:
            new_data = fetch_schedule()
            new_data['new_data'] = "New Data"
            current_data = detect_updates(current_data, new_data)
        except Exception as e:
            print(f"Error fetching data: {e}")

start_polling()


Starting polling...
New data detected!


In [36]:
from utils import process_endpoint_url
import pandas as pd
import json
import requests

class DataIngestion:
    def __init__(self):
        pass

    def initiate_data_ingestion(self):
        game_pk = self.latest_completed_game()
        game_data = self.get_single_game_data(game_pk)
        return game_data

    def latest_completed_game(self):
        schedule_endpoint_url = 'https://statsapi.mlb.com/api/v1/schedule?sportId=1&season=2024'
        
        # Fetch schedule data
        schedule_dates = process_endpoint_url(schedule_endpoint_url, "dates")

        # Normalize games data into a DataFrame
        games = pd.json_normalize(
            schedule_dates.explode('games').reset_index(drop=True)['games']
        )

        date_columns = [
            "gameDate",
            "officialDate",
            "rescheduleDate",
            "rescheduleGameDate",
            "rescheduledFromDate",
            "resumeDate",
            "resumeGameDate",
            "resumedFromDate"
        ]

        # Convert the specified columns to datetime
        for col in date_columns:
            games[col] = pd.to_datetime(games[col], errors='coerce')

        # Filter for completed games
        completed_games = games[
            games['status.detailedState'].isin(['Final', 'Completed Early'])
        ]

        # Get the most recent completed game
        completed_games = completed_games.sort_values(by='gameDate', ascending=False)
        latest_game = completed_games.iloc[0]

        return latest_game['gamePk']
    
    
    def get_single_game_data(self, game_pk):
        single_game_feed_url = f'https://statsapi.mlb.com/api/v1.1/game/{game_pk}/feed/live'

        single_game_info_json = json.loads(requests.get(single_game_feed_url).content)

        return single_game_info_json
    
    def get_games_between_dates(self, start_date, end_date):
        """
        Fetches all games and filters them between the specified start and end dates.

        Args:
            start_date (str): The start date in the format 'YYYY-MM-DD'.
            end_date (str): The end date in the format 'YYYY-MM-DD'.

        Returns:
            DataFrame: A DataFrame containing games between the specified dates.
        """
        schedule_endpoint_url = 'https://statsapi.mlb.com/api/v1/schedule?sportId=1&season=2024'

        # Fetch all schedule data
        schedule_dates = process_endpoint_url(schedule_endpoint_url, "dates")

        # Normalize games data into a DataFrame
        games = pd.json_normalize(
            schedule_dates.explode('games').reset_index(drop=True)['games']
        )

        date_columns = [
            "gameDate",
            "officialDate",
            "rescheduleDate",
            "rescheduleGameDate",
            "rescheduledFromDate",
            "resumeDate",
            "resumeGameDate",
            "resumedFromDate"
        ]

        # Convert relevant columns to datetime
        for col in date_columns:
            games[col] = pd.to_datetime(games[col], errors='coerce')

        # Filter games by 'gameDate' within the specified range
        start_date = pd.to_datetime(start_date).tz_localize('UTC')
        end_date = pd.to_datetime(end_date).tz_localize('UTC')

        filtered_games = games[
            (games['gameDate'] >= start_date) & (games['gameDate'] <= end_date)
        ]

        return filtered_games

    

data_ingestion = DataIngestion()

# Get games between two dates
start_date = '2024-04-01'
end_date = '2024-04-04'
games_between_dates = data_ingestion.get_games_between_dates(start_date, end_date)




In [35]:
start_date = pd.to_datetime(start_date).tz_localize('UTC')
end_date = pd.to_datetime(end_date).tz_localize('UTC')
print(start_date)


2024-04-01 00:00:00+00:00


In [39]:
data_ingestion = DataIngestion()
game_data = data_ingestion.initiate_data_ingestion()


In [44]:
print((game_data["gameData"]["datetime"]["dateTime"]))

2024-10-31T00:08:00Z


In [43]:
print(pd.to_datetime(game_data["gameData"]["datetime"]["dateTime"]).tz_convert('UTC'))

<class 'pandas._libs.tslibs.timestamps.Timestamp'>


In [7]:
from google import genai
from dotenv import load_dotenv
import os

load_dotenv()
API_KEY = os.getenv("API_KEY")

def generate_text(prompt):
    client = genai.Client(api_key=API_KEY, http_options={'api_version': 'v1alpha'})
    text = ""
    for chunk in client.models.generate_content_stream(
        model='gemini-2.0-flash-thinking-exp', contents=prompt
    ):
        for part in chunk.candidates[0].content.parts:
            if not part.thought:
                text += part.text
    return text

generate_text("What is genAI")

'**GenAI, short for Generative AI, refers to a category of artificial intelligence algorithms and models that are designed to *generate* new content, rather than simply analyzing or acting upon existing data.**\n\nThink of traditional AI as being good at tasks like:\n\n* **Classification:** Identifying if an image is a cat or a dog.\n* **Prediction:**  Predicting stock prices based on historical data.\n* **Automation:**  Automating repetitive tasks based on predefined rules.\n\n**GenAI, on the other hand, is about creation.** It\'s about building AI that can produce things that resemble human creativity and output, such as:\n\n* **Text:** Writing articles, poems, code, scripts, emails, chat responses, and more.\n* **Images:** Creating realistic photos, artwork, illustrations, logos, and even manipulating existing images.\n* **Audio:** Generating music, speech, sound effects, and voiceovers.\n* **Video:** Producing animations, short clips, and even attempting to create longer video cont

In [36]:
from datetime import datetime

date_str = "2024-10-31T00:08:00Z"
date_obj = datetime.strptime(date_str, "%Y-%m-%dT%H:%M:%SZ")  # Parse full datetime
date_only = date_obj.date()  # Extract only the date part

date_only

datetime.date(2024, 10, 31)

In [37]:
start = "2024-10-31"
datetime.strptime(start, "%Y-%m-%d").date()


datetime.date(2024, 10, 31)

In [38]:
date_only == start

False

In [23]:
start = "2024-10-31"
datetime.strptime(start, "%Y-%m-%d").replace(tzinfo=timezone.utc).date()

datetime.date(2024, 10, 31)