In [1]:
# ================================
# 08 REDUCED REGRESSION MODEL
# ================================

library(readr)
library(dplyr)

setwd("C:/Users/Graf David/R/FinalProject")

df <- read_csv("dataset/train.csv", show_col_types = FALSE)

df_model <- df %>% select(-`v.id`)

set.seed(42)

sample_size <- floor(0.75 * nrow(df_model))
train_index <- sample(seq_len(nrow(df_model)), size = sample_size)

train_data <- df_model[train_index, ]
test_data  <- df_model[-train_index, ]



Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



In [2]:
reduced_model <- lm(
  `current price` ~ 
    `on road old` + 
    `on road now` + 
    km + 
    condition + 
    years,
  data = train_data
)


In [3]:
summary(reduced_model)


In [4]:
# full model заново для коректного порівняння
full_model <- lm(`current price` ~ ., data = train_data)

r2_full  <- summary(full_model)$r.squared
r2_red   <- summary(reduced_model)$r.squared

adj_full <- summary(full_model)$adj.r.squared
adj_red  <- summary(reduced_model)$adj.r.squared

cat("\n==============================\n")
cat("MODEL COMPARISON\n")
cat("==============================\n")

cat("Full R-squared:     ", r2_full, "\n")
cat("Reduced R-squared: ", r2_red, "\n")

cat("Full Adj R²:       ", adj_full, "\n")
cat("Reduced Adj R²:    ", adj_red, "\n")



MODEL COMPARISON
Full R-squared:      0.9953061 
Reduced R-squared:  0.9952688 
Full Adj R²:        0.9952425 
Reduced Adj R²:     0.995237 


In [5]:
cat("\n==============================\n")
cat("FINAL REDUCED MODEL FORMULA\n")
cat("==============================\n")

print(formula(reduced_model))



FINAL REDUCED MODEL FORMULA
`current price` ~ `on road old` + `on road now` + km + condition + 
    years


In [6]:
# ================================
# FULL SUMMARY — STEP 08
# ================================

cat("\n==============================\n")
cat("FULL REDUCED MODEL SUMMARY\n")
cat("==============================\n")

cat("\n--- MODEL FORMULA ---\n")
print(formula(reduced_model))

cat("\n--- COEFFICIENTS ---\n")
print(summary(reduced_model)$coefficients)

cat("\n--- MODEL QUALITY ---\n")
cat("Reduced R-squared: ", r2_red, "\n")
cat("Reduced Adj R²:    ", adj_red, "\n")

cat("\n==============================\n")
cat("END OF STEP 08\n")
cat("==============================\n")



FULL REDUCED MODEL SUMMARY

--- MODEL FORMULA ---
`current price` ~ `on road old` + `on road now` + km + condition + 
    years

--- COEFFICIENTS ---
                   Estimate   Std. Error     t value      Pr(>|t|)
(Intercept)   -1.322787e+04 5.677044e+03   -2.330063  2.006941e-02
`on road old`  5.051809e-01 5.398686e-03   93.574787  0.000000e+00
`on road now`  5.015764e-01 5.547452e-03   90.415636  0.000000e+00
km            -4.002298e+00 1.087900e-02 -367.892068  0.000000e+00
condition      4.608545e+03 1.114214e+02   41.361404 4.847929e-195
years         -1.610960e+03 1.848239e+02   -8.716189  1.857632e-17

--- MODEL QUALITY ---
Reduced R-squared:  0.9952688 
Reduced Adj R²:     0.995237 

END OF STEP 08
