In [None]:
import json
import pandas as pd

# Load the data
file_path = "../../Code/Kishan/Json_File/Refined_file/Updated_Filtered_Quarterly_data.json"
with open(file_path, "r") as file:
    data = json.load(file)

# Get all parameters (excluding "Quarters")
all_parameters = list(data["Quarterly"].keys())[1:]

# Initialize a dictionary to store combined values
param_data = {param: [] for param in all_parameters}

# Iterate through each parameter
for param in all_parameters:
    parameter_data = data["Quarterly"][param]  # Get data for this parameter
    
    combined_values = []  # Store all companies' values for this parameter
    for company, values in parameter_data.items():  # Iterate through companies
        if isinstance(values, list):  # If values is a list, append directly
            combined_values.extend(values)
        elif isinstance(values, dict):  # If values is a dictionary, process normally
            for key, val_list in values.items():
                combined_values.extend(val_list)  # Flatten and add all values

    param_data[param] = combined_values  # Store combined values for the parameter

# Convert to DataFrame
df = pd.DataFrame(dict([(key, pd.Series(value)) for key, value in param_data.items()]))

# Save to Excel
df.to_excel("combined_parameters_data.xlsx", index=False)
print("Data extraction complete. Saved as combined_parameters_data.xlsx")


Data extraction complete. Saved as combined_parameters_data.xlsx


In [None]:
import json
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Load the data
file_path = "../../Code/Kishan/Json_File/Refined_file/Updated_Filtered_Quarterly_data.json"
with open(file_path, "r") as file:
    data = json.load(file)

# Get all parameters (excluding "Quarters")
all_parameters = list(data["Quarterly"].keys())[1:]

# Initialize a dictionary to store combined values
param_data = {param: [] for param in all_parameters}

# Iterate through each parameter
for param in all_parameters:
    parameter_data = data["Quarterly"][param]  # Get data for this parameter
    
    combined_values = []  # Store all companies' values for this parameter
    for company, values in parameter_data.items():  # Iterate through companies
        if isinstance(values, list):  # If values is a list, append directly
            combined_values.extend(values)
        elif isinstance(values, dict):  # If values is a dictionary, process normally
            for key, val_list in values.items():
                combined_values.extend(val_list)  # Flatten and add all values

    param_data[param] = combined_values  # Store combined values for the parameter

# Convert to DataFrame
df = pd.DataFrame(dict([(key, pd.Series(value)) for key, value in param_data.items()]))

# Define independent and dependent variables
independent_vars = [
    "Net sales/income from operations", "Total income from operations", "Employees cost",
    "depreciat", "Other expenses", "P/l before other inc. , int., excpt. items & tax",
    "Other income", "P/l before int., excpt. items & tax", "Interest",
    "P/l before exceptional items & tax", "Tax"
]
dependent_var = "Net profit/(loss) for the period"

# Check if the dataset contains all required columns
missing_columns = [col for col in independent_vars + [dependent_var] if col not in df.columns]
if missing_columns:
    print(f"Missing columns in dataset: {missing_columns}")
    exit()

# Prepare X (independent) and y (dependent)
X = df[independent_vars]
y = df[dependent_var]

# Drop any rows with missing values
X = X.dropna()
y = y.loc[X.index]  

# Convert to numeric (handle any non-numeric values)
X = X.apply(pd.to_numeric, errors='coerce')
y = pd.to_numeric(y, errors='coerce')

# Standardize features (important for Ridge & Lasso)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Multiple Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

# Model Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\nModel Training Complete!")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.2f}")

# Function to take user input and predict net profit
def predict_net_profit():
    print("\n🔍 Enter values for the independent variables to predict Net Profit:")

    user_input = []
    for feature in independent_vars:
        while True:
            try:
                value = float(input(f"Enter value for '{feature}': "))
                user_input.append(value)
                break
            except ValueError:
                print("Invalid input! Please enter a numeric value.")

    # Convert user input into a DataFrame for prediction
    user_df = pd.DataFrame([user_input], columns=independent_vars)

    # Display user input before making the prediction
    print("\n📋 You have entered the following values:")
    for feature, value in zip(independent_vars, user_input):
        print(f"{feature}: {value}")

    # Predict net profit
    predicted_profit = model.predict(user_df)[0]
    
    print(f"\n💰 Predicted Net Profit/(Loss) for the given input: {predicted_profit:.2f}")

# Ask user if they want to predict net profit
while True:
    choice = input("\nWould you like to predict net profit? (yes/no): ").strip().lower()
    if choice.lower() == "yes":
        predict_net_profit()
    elif choice.lower() == "no":
        print("🔚 Exiting program.")
        break
    else:
        print("Invalid choice! Please type 'yes' or 'no'.")



Model Training Complete!
Mean Squared Error (MSE): 2423.56
R² Score: 1.00
