<a href="https://colab.research.google.com/github/SatoruGojo9/TDS-ASSAIGNMENT/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Demand Response Curve Analysis using Simple Linear Regression
# Google Colab Code

# Step 1: Install required libraries (if needed)
# Most libraries are pre-installed in Google Colab

# Step 2: Import necessary libraries
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from google.colab import files

print("=" * 60)
print("DEMAND RESPONSE CURVE ANALYSIS")
print("=" * 60)

# Step 3: Upload the Excel file
print("\nüìÅ Please upload your 'data_transform.xlsx' file...")
uploaded = files.upload()

# Step 4: Read the Excel file
print("\nüìä Reading the dataset...")
df = pd.read_excel('data_transform.xlsx')

# Display basic information about the dataset
print("\n" + "=" * 60)
print("DATASET INFORMATION")
print("=" * 60)
print(f"\nDataset shape: {df.shape}")
print(f"\nColumn names: {df.columns.tolist()}")
print("\nFirst few rows:")
print(df.head())
print("\nDataset summary:")
print(df.describe())

# Step 5: Check for missing values
print("\n" + "=" * 60)
print("MISSING VALUES CHECK")
print("=" * 60)
print(df.isnull().sum())

# Step 6: Fit Simple Linear Regression Model
# Assuming the columns are named 'Price' and 'Sales' (or similar)
# Adjust column names based on your actual dataset
print("\n" + "=" * 60)
print("FITTING LINEAR REGRESSION MODEL")
print("=" * 60)

# Display column names to help identify the correct variables
print(f"\nAvailable columns: {df.columns.tolist()}")

# Determine column names (assuming first column is independent, second is dependent)
col_names = df.columns.tolist()
if len(col_names) >= 2:
    x_var = col_names[0]  # Price (independent variable)
    y_var = col_names[1]  # Sales (dependent variable)

    print(f"\nUsing '{x_var}' as independent variable (Price)")
    print(f"Using '{y_var}' as dependent variable (Sales)")

    # Create formula for OLS
    formula = f'{y_var} ~ {x_var}'
    print(f"\nFormula: {formula}")

    # Fit the model using statsmodels
    model = smf.ols(formula=formula, data=df).fit()

    # Display full model summary
    print("\n" + "=" * 60)
    print("MODEL SUMMARY")
    print("=" * 60)
    print(model.summary())

    # Step 7: Extract and display answers
    print("\n" + "=" * 60)
    print("ANSWERS TO QUESTIONS")
    print("=" * 60)

    # Question 1: R-square value
    r_squared = round(model.rsquared, 2)
    print(f"\n1) R-square value: {r_squared}")

    # Question 2: Is the model significant?
    p_value = model.f_pvalue
    print(f"\n2) Is the model significant?")
    print(f"   F-statistic p-value: {p_value:.6f}")
    if p_value < 0.05:
        print("   Answer: A. Yes (p-value < 0.05)")
    else:
        print("   Answer: B. No (p-value >= 0.05)")

    # Question 3: Intercept value
    intercept = round(model.params['Intercept'], 2)
    print(f"\n3) Intercept value: {intercept}")

    # Question 4: Slope value
    slope = round(model.params[x_var], 2)
    print(f"\n4) Slope value: {slope}")

    # Additional information
    print("\n" + "=" * 60)
    print("ADDITIONAL MODEL STATISTICS")
    print("=" * 60)
    print(f"Adjusted R-squared: {round(model.rsquared_adj, 4)}")
    print(f"F-statistic: {round(model.fvalue, 4)}")
    print(f"F-statistic p-value: {model.f_pvalue:.6e}")
    print(f"AIC: {round(model.aic, 4)}")
    print(f"BIC: {round(model.bic, 4)}")

    # Coefficient details
    print("\n" + "=" * 60)
    print("COEFFICIENT DETAILS")
    print("=" * 60)
    print("\nParameter Estimates:")
    print(model.params)
    print("\nStandard Errors:")
    print(model.bse)
    print("\nT-statistics:")
    print(model.tvalues)
    print("\nP-values:")
    print(model.pvalues)

    print("\n" + "=" * 60)
    print("ANALYSIS COMPLETE!")
    print("=" * 60)

else:
    print("\n‚ö†Ô∏è Error: Dataset should have at least 2 columns (Price and Sales)")
    print("Please check your data file.")

DEMAND RESPONSE CURVE ANALYSIS

üìÅ Please upload your 'data_transform.xlsx' file...


Saving data_transform.xlsx to data_transform.xlsx

üìä Reading the dataset...

DATASET INFORMATION

Dataset shape: (50, 2)

Column names: ['Price', 'Sales']

First few rows:
   Price  Sales
0   2.20   68.9
1   7.48   15.6
2   7.26   19.5
3   3.08   35.1
4   8.14   10.4

Dataset summary:
           Price      Sales
count  50.000000   50.00000
mean    5.024800   41.02800
std     2.165317   37.39749
min     1.760000    7.80000
25%     2.915000   18.52500
50%     5.170000   26.00000
75%     7.205000   52.97500
max     8.140000  188.50000

MISSING VALUES CHECK
Price    0
Sales    0
dtype: int64

FITTING LINEAR REGRESSION MODEL

Available columns: ['Price', 'Sales']

Using 'Price' as independent variable (Price)
Using 'Sales' as dependent variable (Sales)

Formula: Sales ~ Price

MODEL SUMMARY
                            OLS Regression Results                            
Dep. Variable:                  Sales   R-squared:                       0.526
Model:                            OLS   Ad

In [2]:
# Natural Log Transformation Analysis
# Run this code in a NEW cell after the previous analysis

print("=" * 60)
print("NATURAL LOG TRANSFORMATION ANALYSIS")
print("=" * 60)

# Step 1: Apply natural log transformation to both variables
print("\nüìä Applying natural log transformation...")

# Create new columns with log-transformed values
df['log_Price'] = np.log(df['Price'])
df['log_Sales'] = np.log(df['Sales'])

print("\nTransformed dataset (first few rows):")
print(df[['Price', 'Sales', 'log_Price', 'log_Sales']].head())

print("\nTransformed dataset summary:")
print(df[['log_Price', 'log_Sales']].describe())

# Step 2: Fit Simple Linear Regression on transformed data
print("\n" + "=" * 60)
print("FITTING LINEAR REGRESSION ON TRANSFORMED DATA")
print("=" * 60)

# Create formula for OLS with log-transformed variables
formula_log = 'log_Sales ~ log_Price'
print(f"\nFormula: {formula_log}")

# Fit the model using statsmodels
model_log = smf.ols(formula=formula_log, data=df).fit()

# Display full model summary
print("\n" + "=" * 60)
print("MODEL SUMMARY (LOG-TRANSFORMED)")
print("=" * 60)
print(model_log.summary())

# Step 3: Extract and display answers for questions 5 and 6
print("\n" + "=" * 60)
print("ANSWERS TO QUESTIONS 5 & 6")
print("=" * 60)

# Question 5: R-square value on transformed dataset
r_squared_log = round(model_log.rsquared, 2)
print(f"\n5) R-square value (transformed dataset): {r_squared_log}")

# Question 6: Is the model significant?
p_value_log = model_log.f_pvalue
print(f"\n6) Is the model significant?")
print(f"   F-statistic p-value: {p_value_log:.6f}")
if p_value_log < 0.05:
    print("   Answer: A. Yes (p-value < 0.05)")
else:
    print("   Answer: B. No (p-value >= 0.05)")

# Additional comparison information
print("\n" + "=" * 60)
print("MODEL COMPARISON: ORIGINAL vs TRANSFORMED")
print("=" * 60)
print(f"\nOriginal Model R-squared: {round(model.rsquared, 4)}")
print(f"Log-Transformed Model R-squared: {round(model_log.rsquared, 4)}")
print(f"\nImprovement: {round((model_log.rsquared - model.rsquared) * 100, 2)}%")

print("\n" + "=" * 60)
print("LOG-TRANSFORMED ANALYSIS COMPLETE!")
print("=" * 60)

NATURAL LOG TRANSFORMATION ANALYSIS

üìä Applying natural log transformation...

Transformed dataset (first few rows):
   Price  Sales  log_Price  log_Sales
0   2.20   68.9   0.788457   4.232656
1   7.48   15.6   2.012233   2.747271
2   7.26   19.5   1.982380   2.970414
3   3.08   35.1   1.124930   3.558201
4   8.14   10.4   2.096790   2.341806

Transformed dataset summary:
       log_Price  log_Sales
count  50.000000  50.000000
mean    1.507006   3.403014
std     0.489688   0.775953
min     0.565314   2.054124
25%     1.069349   2.918670
50%     1.642646   3.258097
75%     1.974687   3.969763
max     2.096790   5.239098

FITTING LINEAR REGRESSION ON TRANSFORMED DATA

Formula: log_Sales ~ log_Price

MODEL SUMMARY (LOG-TRANSFORMED)
                            OLS Regression Results                            
Dep. Variable:              log_Sales   R-squared:                       0.760
Model:                            OLS   Adj. R-squared:                  0.755
Method:              

In [3]:
# Extract Intercept and Slope from Log-Transformed Model
# Run this code in a NEW cell after the log transformation analysis

print("=" * 60)
print("LOG-TRANSFORMED MODEL COEFFICIENTS")
print("=" * 60)

# Question 7: Intercept value
intercept_log = round(model_log.params['Intercept'], 2)
print(f"\n7) Intercept value (log-transformed model): {intercept_log}")

# Question 8: Slope value
slope_log = round(model_log.params['log_Price'], 2)
print(f"\n8) Slope value (log-transformed model): {slope_log}")

# Additional coefficient details
print("\n" + "=" * 60)
print("DETAILED COEFFICIENT INFORMATION")
print("=" * 60)

print("\nFull Parameter Estimates:")
print(model_log.params)

print("\nRounded to 2 decimal places:")
print(f"  Intercept: {intercept_log}")
print(f"  Slope (log_Price): {slope_log}")

print("\nStandard Errors:")
print(model_log.bse)

print("\nT-statistics:")
print(model_log.tvalues)

print("\nP-values:")
print(model_log.pvalues)

print("\n95% Confidence Intervals:")
print(model_log.conf_int())

# Interpretation
print("\n" + "=" * 60)
print("INTERPRETATION")
print("=" * 60)
print(f"\nRegression Equation: log(Sales) = {intercept_log} + ({slope_log}) √ó log(Price)")
print(f"\nInterpretation:")
print(f"  - Intercept ({intercept_log}): When log(Price) = 0 (i.e., Price = 1), log(Sales) = {intercept_log}")
print(f"  - Slope ({slope_log}): This represents the price elasticity of demand")
print(f"    A 1% increase in Price leads to approximately a {abs(slope_log)}% decrease in Sales")

print("\n" + "=" * 60)
print("COEFFICIENT EXTRACTION COMPLETE!")
print("=" * 60)

LOG-TRANSFORMED MODEL COEFFICIENTS

7) Intercept value (log-transformed model): 5.48

8) Slope value (log-transformed model): -1.38

DETAILED COEFFICIENT INFORMATION

Full Parameter Estimates:
Intercept    5.484836
log_Price   -1.381429
dtype: float64

Rounded to 2 decimal places:
  Intercept: 5.48
  Slope (log_Price): -1.38

Standard Errors:
Intercept    0.177370
log_Price    0.112043
dtype: float64

T-statistics:
Intercept    30.923102
log_Price   -12.329464
dtype: float64

P-values:
Intercept    2.371186e-33
log_Price    1.738583e-16
dtype: float64

95% Confidence Intervals:
                  0         1
Intercept  5.128210  5.841463
log_Price -1.606707 -1.156152

INTERPRETATION

Regression Equation: log(Sales) = 5.48 + (-1.38) √ó log(Price)

Interpretation:
  - Intercept (5.48): When log(Price) = 0 (i.e., Price = 1), log(Sales) = 5.48
  - Slope (-1.38): This represents the price elasticity of demand
    A 1% increase in Price leads to approximately a 1.38% decrease in Sales

COEFFI

In [4]:
# Pandya Motors - Linear Regression Model for Profit Prediction
# Google Colab Code

# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from google.colab import files

print("=" * 60)
print("PANDYA MOTORS - PROFIT PREDICTION MODEL")
print("=" * 60)

# Step 2: Upload the Excel files
print("\nüìÅ Please upload your 'X.xlsx' file (feature matrix)...")
uploaded_X = files.upload()

print("\nüìÅ Please upload your 'y.xlsx' file (target variable)...")
uploaded_y = files.upload()

# Step 3: Read the Excel files
print("\nüìä Reading the datasets...")
X = pd.read_excel('X.xlsx')
y = pd.read_excel('y.xlsx')

# Display basic information about the datasets
print("\n" + "=" * 60)
print("DATASET INFORMATION")
print("=" * 60)

print(f"\nFeature Matrix (X) shape: {X.shape}")
print(f"Target Variable (y) shape: {y.shape}")

print("\nFeature Matrix (X) - First few rows:")
print(X.head())

print("\nFeature Matrix (X) - Column names:")
print(X.columns.tolist())

print("\nTarget Variable (y) - First few rows:")
print(y.head())

print("\nFeature Matrix (X) - Summary Statistics:")
print(X.describe())

print("\nTarget Variable (y) - Summary Statistics:")
print(y.describe())

# Check for missing values
print("\n" + "=" * 60)
print("MISSING VALUES CHECK")
print("=" * 60)
print("\nMissing values in X:")
print(X.isnull().sum())
print("\nMissing values in y:")
print(y.isnull().sum())

# Step 4: Train-Test Split
print("\n" + "=" * 60)
print("TRAIN-TEST SPLIT")
print("=" * 60)
print("\nSplitting data with test_size=0.2, random_state=0...")

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

print(f"\nTraining set size: {X_train.shape[0]} samples")
print(f"Test set size: {X_test.shape[0]} samples")
print(f"Number of features: {X_train.shape[1]}")

# Step 5: Build Linear Regression Model
print("\n" + "=" * 60)
print("BUILDING LINEAR REGRESSION MODEL")
print("=" * 60)

# Initialize the model
model = LinearRegression()

# Fit the model on training data
print("\nFitting the model on training data...")
model.fit(X_train, y_train)
print("‚úì Model training complete!")

# Step 6: Make Predictions
print("\n" + "=" * 60)
print("MAKING PREDICTIONS")
print("=" * 60)

# Predict on test set
y_pred = model.predict(X_test)
print("‚úì Predictions on test set complete!")

# Step 7: Model Evaluation
print("\n" + "=" * 60)
print("MODEL EVALUATION METRICS")
print("=" * 60)

# Calculate R¬≤ score
r2 = r2_score(y_test, y_pred)
r2_rounded = round(r2, 2)

print(f"\nR¬≤ Score (Coefficient of Determination): {r2}")
print(f"R¬≤ Score (rounded to 2 decimals): {r2_rounded}")

# Additional metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)

print(f"\nMean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")

# Training score
r2_train = model.score(X_train, y_train)
print(f"\nR¬≤ Score on Training Set: {r2_train:.4f}")
print(f"R¬≤ Score on Test Set: {r2:.4f}")

# Step 8: Model Coefficients
print("\n" + "=" * 60)
print("MODEL COEFFICIENTS")
print("=" * 60)

print(f"\nIntercept: {model.intercept_}")

print("\nCoefficients for each feature:")
coefficients_df = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': model.coef_.flatten()
})
print(coefficients_df)

# Step 9: Sample Predictions
print("\n" + "=" * 60)
print("SAMPLE PREDICTIONS vs ACTUAL")
print("=" * 60)

comparison_df = pd.DataFrame({
    'Actual': y_test.values.flatten()[:10],
    'Predicted': y_pred.flatten()[:10],
    'Difference': (y_test.values.flatten()[:10] - y_pred.flatten()[:10])
})
print("\nFirst 10 predictions:")
print(comparison_df)

# Step 10: Answer to Question 9
print("\n" + "=" * 60)
print("ANSWER TO QUESTION 9")
print("=" * 60)
print(f"\n9) What is the R¬≤ of the model?")
print(f"   Answer: {r2_rounded}")

print("\n" + "=" * 60)
print("ANALYSIS COMPLETE!")
print("=" * 60)

PANDYA MOTORS - PROFIT PREDICTION MODEL

üìÅ Please upload your 'X.xlsx' file (feature matrix)...


Saving X.xlsx to X.xlsx

üìÅ Please upload your 'y.xlsx' file (target variable)...


Saving y.xlsx to y.xlsx

üìä Reading the datasets...

DATASET INFORMATION

Feature Matrix (X) shape: (50, 5)
Target Variable (y) shape: (50, 1)

Feature Matrix (X) - First few rows:
   Safety Features  Tech Features  Marketing Spend  Premium Hatchback  SUV
0        175349.20      116897.80        491784.10                  0    1
1        172597.70      131377.59        463898.53                  0    0
2        163441.51       81145.55        427934.54                  1    0
3        154372.41       98671.85        403199.62                  0    1
4        152107.34       71391.77        386168.42                  1    0

Feature Matrix (X) - Column names:
['Safety Features', 'Tech Features', 'Marketing Spend', 'Premium Hatchback', 'SUV']

Target Variable (y) - First few rows:
      Profit
0  227261.83
1  226792.06
2  226050.39
3  217901.99
4  201187.94

Feature Matrix (X) - Summary Statistics:
       Safety Features  Tech Features  Marketing Spend  Premium Hatchback  \
count      

In [5]:
# Pandya Motors - Intercept and Custom Prediction (Questions 10 & 11)
# Run this code in a NEW cell after the previous analysis

print("=" * 60)
print("QUESTIONS 10 & 11: INTERCEPT AND PREDICTION")
print("=" * 60)

# Question 10: Extract Intercept
print("\n" + "=" * 60)
print("QUESTION 10: INTERCEPT VALUE")
print("=" * 60)

intercept_value = model.intercept_[0]
intercept_rounded = round(intercept_value, 2)

print(f"\nIntercept (full precision): {intercept_value}")
print(f"Intercept (rounded to 2 decimals): {intercept_rounded}")

print(f"\n10) What is the value of the intercept?")
print(f"    Answer: {intercept_rounded}")

# Question 11: Predict profit for given values
print("\n" + "=" * 60)
print("QUESTION 11: CUSTOM PREDICTION")
print("=" * 60)

# Given values
X1 = 1315.46      # Safety Features
X2 = 115816.21    # Tech Features
X3 = 297114.46    # Marketing Spend
X4 = 1            # Premium Hatchback
X5 = 0            # SUV

print("\nGiven Input Values:")
print(f"  X1 (Safety Features): {X1}")
print(f"  X2 (Tech Features): {X2}")
print(f"  X3 (Marketing Spend): {X3}")
print(f"  X4 (Premium Hatchback): {X4}")
print(f"  X5 (SUV): {X5}")

# Create input array for prediction
# Feature order: Safety Features, Tech Features, Marketing Spend, Premium Hatchback, SUV
input_data = np.array([[X1, X2, X3, X4, X5]])

print("\nInput array for prediction:")
print(input_data)

# Make prediction using the trained model
predicted_profit = model.predict(input_data)
predicted_profit_value = predicted_profit[0][0]
predicted_profit_rounded = round(predicted_profit_value, 2)

print("\n" + "=" * 60)
print("PREDICTION RESULT")
print("=" * 60)

print(f"\nPredicted Profit (full precision): {predicted_profit_value}")
print(f"Predicted Profit (rounded to 2 decimals): {predicted_profit_rounded}")

# Manual calculation for verification
print("\n" + "=" * 60)
print("MANUAL CALCULATION VERIFICATION")
print("=" * 60)

print("\nRegression Equation:")
print(f"Profit = Intercept + (coef1 √ó X1) + (coef2 √ó X2) + (coef3 √ó X3) + (coef4 √ó X4) + (coef5 √ó X5)")

print("\nCoefficients:")
print(f"  Intercept: {intercept_value:.6f}")
print(f"  Safety Features (coef1): {model.coef_[0][0]:.6f}")
print(f"  Tech Features (coef2): {model.coef_[0][1]:.6f}")
print(f"  Marketing Spend (coef3): {model.coef_[0][2]:.6f}")
print(f"  Premium Hatchback (coef4): {model.coef_[0][3]:.6f}")
print(f"  SUV (coef5): {model.coef_[0][4]:.6f}")

manual_calculation = (intercept_value +
                      model.coef_[0][0] * X1 +
                      model.coef_[0][1] * X2 +
                      model.coef_[0][2] * X3 +
                      model.coef_[0][3] * X4 +
                      model.coef_[0][4] * X5)

print(f"\nStep-by-step calculation:")
print(f"  Profit = {intercept_value:.2f}")
print(f"         + ({model.coef_[0][0]:.6f} √ó {X1})")
print(f"         + ({model.coef_[0][1]:.6f} √ó {X2})")
print(f"         + ({model.coef_[0][2]:.6f} √ó {X3})")
print(f"         + ({model.coef_[0][3]:.6f} √ó {X4})")
print(f"         + ({model.coef_[0][4]:.6f} √ó {X5})")
print(f"         = {manual_calculation:.2f}")

print(f"\nVerification: Model prediction matches manual calculation: {abs(predicted_profit_value - manual_calculation) < 0.01}")

# Final Answers
print("\n" + "=" * 60)
print("FINAL ANSWERS")
print("=" * 60)

print(f"\n10) What is the value of the intercept?")
print(f"    Answer: {intercept_rounded}")

print(f"\n11) What is the predicted profit for X1={X1}, X2={X2}, X3={X3}, X4={X4}, X5={X5}?")
print(f"    Answer: {predicted_profit_rounded}")

print("\n" + "=" * 60)
print("COMPLETE!")
print("=" * 60)

QUESTIONS 10 & 11: INTERCEPT AND PREDICTION

QUESTION 10: INTERCEPT VALUE

Intercept (full precision): 69744.98712385244
Intercept (rounded to 2 decimals): 69744.99

10) What is the value of the intercept?
    Answer: 69744.99

QUESTION 11: CUSTOM PREDICTION

Given Input Values:
  X1 (Safety Features): 1315.46
  X2 (Tech Features): 115816.21
  X3 (Marketing Spend): 297114.46
  X4 (Premium Hatchback): 1
  X5 (SUV): 0

Input array for prediction:
[[1.3154600e+03 1.1581621e+05 2.9711446e+05 1.0000000e+00 0.0000000e+00]]

PREDICTION RESULT

Predicted Profit (full precision): 84489.10563629777
Predicted Profit (rounded to 2 decimals): 84489.11

MANUAL CALCULATION VERIFICATION

Regression Equation:
Profit = Intercept + (coef1 √ó X1) + (coef2 √ó X2) + (coef3 √ó X3) + (coef4 √ó X4) + (coef5 √ó X5)

Coefficients:
  Intercept: 69744.987124
  Safety Features (coef1): 0.773467
  Tech Features (coef2): 0.032885
  Marketing Spend (coef3): 0.036610
  Premium Hatchback (coef4): -959.284160
  SUV (coef

