In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

from sklearn.impute import SimpleImputer

def prepare_and_train_model(input_filename):
    # Read the data
    data = pd.read_csv(input_filename)
    
    # Ensure 'EWJ.Price.Percent.Today' exists
    if 'EWJ.Price.Percent.Today' not in data.columns:
        print("Error: 'EWJ.Price.Percent.Today' column not found. Run previous analysis first.")
        return None, None, None
    
    # Identify non-numeric columns
    non_numeric_columns = data.select_dtypes(exclude=[np.number]).columns
    print(f"Non-numeric columns being dropped: {non_numeric_columns}")
    
    # Select only numeric data for imputation and scaling
    numeric_data = data.select_dtypes(include=[np.number])

    # Filter out columns with all missing values
    numeric_data_filtered = numeric_data.dropna(axis=1, how='all')

    # Impute missing values with the column mean (for numeric columns)
    imputer = SimpleImputer(strategy='mean')
    numeric_data_imputed = pd.DataFrame(imputer.fit_transform(numeric_data_filtered), columns=numeric_data_filtered.columns)

    
    # Prepare features and target (use only numeric columns)
    X = numeric_data_imputed.drop(['EWJ.Price.Percent.Today'], axis=1)
    y = numeric_data_imputed['EWJ.Price.Percent.Today']
    
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Scale the features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train Lasso Regression
    lasso = Lasso(alpha=0.1)
    lasso.fit(X_train_scaled, y_train)
    
    # Predict and evaluate
    y_pred = lasso.predict(X_test_scaled)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Model Performance:")
    print(f"Mean Squared Error: {mse}")
    print(f"R-squared Score: {r2}")
    
    # Print feature coefficients
    feature_names = X.columns
    coefficients = lasso.coef_
    print("\nFeature Coefficients:")
    for feature, coef in zip(feature_names, coefficients):
        print(f"{feature}: {coef}")
    
    return lasso, scaler, X.columns

def get_user_input(feature_columns):
    user_data = {}
    print("\nPlease enter values for the following features:")
    for col in feature_columns:
        while True:
            try:
                value = float(input(f"{col}: "))
                user_data[col] = value
                break
            except ValueError:
                print("Please enter a valid numeric value.")
    
    return pd.DataFrame([user_data])
def predict_stock_movement(model, scaler, feature_columns, input_data):
    # Scale the input data
    input_scaled = scaler.transform(input_data)
    
    # Predict percentage change
    predicted_percent_change = model.predict(input_scaled)[0]
    
    # Determine movement direction
    movement = "Up" if predicted_percent_change > 0 else "Down"
    
    return predicted_percent_change, movement

def main(input_filename, output_filename):
    # Train the model
    model, scaler, feature_columns = prepare_and_train_model(input_filename)
    
    if model is None:
        return
    
    # Get user input
    user_input = get_user_input(feature_columns)
    
    # Predict stock movement
    pred_percent_change, movement = predict_stock_movement(model, scaler, feature_columns, user_input)
    
    # Prepare output
    user_input['Predicted.Percent.Change'] = pred_percent_change
    user_input['Movement.Direction'] = movement
    
    # Save results
    user_input.to_csv(output_filename, index=False)
    
    print("\nPrediction Results:")
    print(f"Predicted Percentage Change: {pred_percent_change:.4f}%")
    print(f"Movement Direction: {movement}")

# Example usage
main('input.engineered.csv', 'price_output.csv')

