In [64]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [65]:
# Load the dataset
dataset = pd.read_csv('Dataset-for-498-V3.csv') 


In [66]:
dataset = dataset[['Duration (Years)','Operational Cost','Total Fund', 'Revenue', 'Profit', 'Net Profit Margin', 'Valuation']]
dataset['Net Profit Margin'] = dataset['Net Profit Margin'].str.rstrip('%').astype('float') / 100

# Convert remaining columns to numeric format
dataset = dataset.apply(pd.to_numeric, errors='coerce')

In [67]:
dataset = dataset.dropna()

In [68]:
X = dataset.drop('Valuation', axis=1)
y = dataset['Valuation']

In [69]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [70]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [71]:
model = LinearRegression()
model.fit(X_train, y_train)

LinearRegression()

In [72]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
accuracy_percentage = r2 * 100
print("Mean Squared Error (MSE):", mse)
print("R-squared (R2):", r2)
print("R-squared (R2): {:.2f}%".format(accuracy_percentage))

# Prediction example
# Provide input values for a specific company
operational_duration = 5
operation_cost = 200000
revenue = 500000
profit = 100000
net_profit_margin = 0.2
total_fund=1032222222


# Scale the input values
input_values = scaler.transform([[operational_duration, operation_cost,total_fund, revenue, profit, net_profit_margin]])

# Make the prediction
predicted_valuation = model.predict(input_values)

print("Predicted Valuation:", predicted_valuation)


Mean Squared Error (MSE): 2.1165659285990456e-19
R-squared (R2): 1.0
R-squared (R2): 100.00%
Predicted Valuation: [1.03243391e+09]


