In [1]:
import requests
import pandas as pd
import yfinance as yf
import gradio as gr
import pickle
from datetime import datetime, timedelta
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Define your API keys and URLs

# Fetch earnings data from Alpha Vantage API
def fetch_earnings_data(ticker):
    params = {
        "function": "EARNINGS",
        "symbol": ticker,
        "apikey": ALPHA_VANTAGE_API_KEY
    }
    try:
        response = requests.get(ALPHA_VANTAGE_API_URL, params=params)
        response.raise_for_status()
        data = response.json()
        return data.get("quarterlyEarnings", [])
    except Exception as e:
        return f"Error fetching earnings data: {e}"

# Extract earnings for the specified period (year and quarter)
def extract_earnings_for_period(data, year, quarter):
    for record in data:
        fiscal_date = datetime.strptime(record["fiscalDateEnding"], '%Y-%m-%d')
        fiscal_year = fiscal_date.year
        fiscal_quarter = (fiscal_date.month - 1) // 3 + 1
        if fiscal_year == int(year) and fiscal_quarter == int(quarter):
            earnings_call_date = fiscal_date + timedelta(days=30)
            return {
                'earningsCallDate': earnings_call_date.strftime('%Y-%m-%d'),
                'reportedEPS': record.get("reportedEPS"),
                'estimatedEPS': record.get("estimatedEPS")
            }
    return None

# Fetch transcript data (dummy function, replace with actual Ninja API call)
def fetch_transcript(ticker, year, quarter):
    headers = {"X-Api-Key": NINJA_API_KEY}
    params = {"ticker": ticker, "year": year, "quarter": quarter}
    try:
        response = requests.get(NINJA_API_URL, headers=headers, params=params)
        response.raise_for_status()
        return response.json().get("transcript")
    except Exception as e:
        return None

# Analyze sentiment using OpenAI
def analyze_sentiment_with_openai(text):
    headers = {"Authorization": f"Bearer {OPENAI_API_KEY}", "Content-Type": "application/json"}
    try:
        payload = {
            "model": "gpt-3.5-turbo",
            "messages": [
                {"role": "system", "content": "You are a sentiment analysis assistant."},
                {"role": "user", "content": f"Analyze sentiment: {text}"}
            ]
        }
        response = requests.post(OPENAI_API_URL, headers=headers, json=payload)
        response.raise_for_status()
        result = response.json()
        return result['choices'][0]['message']['content'].strip()
    except Exception as e:
        return "Unknown"

# Calculate growth between two dates
def calculate_growth(stock_data, start_date, end_date):
    try:
        start_index = stock_data.index.get_indexer([start_date], method='nearest')[0]
        end_index = stock_data.index.get_indexer([end_date], method='nearest')[0]
        start_price = float(stock_data.iloc[start_index]["Close"])
        end_price = float(stock_data.iloc[end_index]["Close"])
        return ((end_price - start_price) / start_price) * 100
    except Exception as e:
        return None

# Prepare the feature vector for prediction
def prepare_features(ticker, year, quarter):
    earnings_data = fetch_earnings_data(ticker)
    if not earnings_data:
        return None, None  # No earnings data found

    earnings_info = extract_earnings_for_period(earnings_data, year, quarter)
    if not earnings_info:
        return None, None  # No earnings info found

    # Fetch transcript and analyze sentiment
    transcript = fetch_transcript(ticker, year, quarter)
    sentiment = "neutral"
    if transcript:
        sentiment = analyze_sentiment_with_openai(transcript)

    # Fetch stock data for the growth calculation
    earnings_call_date = datetime.strptime(earnings_info['earningsCallDate'], '%Y-%m-%d')
    stock_data = yf.download(ticker, start=earnings_call_date - timedelta(days=10), end=earnings_call_date + timedelta(days=30))

    growth_7_days = calculate_growth(stock_data, earnings_call_date - timedelta(days=7), earnings_call_date + timedelta(days=7))

    if growth_7_days is None:
        return None, None  # No stock price data available

    sentiment_map = {"positive": 1, "neutral": 0, "negative": -1}
    
    # Full feature set (sentiment, EPS, and growth)
    features = [
        sentiment_map.get(sentiment, 0),  # Sentiment
        float(earnings_info['reportedEPS']),
        float(earnings_info['estimatedEPS']),
        growth_7_days,
        # Placeholder for other growth features, replace with actual data if needed
        0,  # Placeholder for 14_day_growth
        0,  # Placeholder for 30_day_growth
        0,  # Placeholder for EPS_Growth
        0,  # Placeholder for other features if any
        0   # Placeholder for any additional features you may have
    ]
    
    return features, growth_7_days

# Define the prediction model pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train the model before predicting
def train_model():
    data = pd.read_csv('final_combined_data.csv')  # Assuming your enhanced data is here
    features, target = [], []

    for idx, row in data.iterrows():
        year, quarter = row['year'], row['quarter']
        ticker = row['Ticker']
        feature_set, growth = prepare_features(ticker, year, quarter)
        if feature_set:
            features.append(feature_set)
            target.append(growth)

    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)
    pipeline.fit(X_train, y_train)

    # Evaluate the model
    y_pred = pipeline.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"Mean Squared Error (MSE): {mse}")
    print(f"Mean Absolute Error (MAE): {mae}")
    print(f"R-squared (R²): {r2}")

    # Save the model pipeline
    with open('best_model_pipeline.pkl', 'wb') as f:
        pickle.dump(pipeline, f)

# Function to predict growth based on input
def predict_growth(ticker, year, quarter):
    features, growth = prepare_features(ticker, year, quarter)
    if features:
        # Load the trained model pipeline if it's not loaded
        with open('best_model_pipeline.pkl', 'rb') as f:
            pipeline = pickle.load(f)
        prediction = pipeline.predict([features])
        return f"Predicted 7-day growth: {prediction[0]:.2f}%"
    else:
        return "Error making prediction."

# Gradio Interface for user input
iface = gr.Interface(
    fn=predict_growth,
    inputs=[
        gr.Textbox(label="Stock Ticker (e.g., AAPL)", placeholder="Enter the stock ticker"),
        gr.Textbox(label="Year (e.g., 2023)", placeholder="Enter the year of analysis"),
        gr.Textbox(label="Quarter (1-4)", placeholder="Enter the quarter number"),
    ],
    outputs=[
        gr.Textbox(label="Predicted 7-Day Growth")
    ],
    title="Stock Growth Prediction",
    description="Predict 7-day stock growth based on earnings data, sentiment analysis, and financial information."
)

# Launch the Gradio interface
iface.launch(share=True)


Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://51299e2dfeb55d01c6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
