In [0]:
%pip install xgboost streamlit


In [0]:
# Cell 1: Import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
import warnings
warnings.filterwarnings('ignore')
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
import joblib
import os
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score, f1_score
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from xgboost import XGBClassifier, XGBRegressor
import mlflow
import mlflow.sklearn
import mlflow.xgboost
import streamlit as st
import matplotlib.pyplot as plt


In [0]:
df = pd.read_csv("./india_housing_prices.csv")
print(df.shape)
df.head()

In [0]:
# Cell 2: Basic info & missing values
print(df.isnull().sum())

####################################for Duplicate Handle##########


print(df.duplicated().sum())


In [0]:
# =============================================================================
# FULL CATEGORICAL ENCODING CODE (Copy-Paste This)
# Required columns: State, City, Locality, Property_Type, etc.
# =============================================================================




# List of ALL categorical columns that MUST be encoded
categorical_columns = [
    'State',
    'City',
    'Locality',               # This is "Location"
    'Property_Type',          # Apartment, Villa, etc.
    'Furnished_Status',       # Unfurnished, Semi, Fully
    'Facing',                 # North, East, etc.
    'Owner_Type',             # Builder, Individual
    'Availability_Status',    # Ready to Move, Under Construction
    'Security',               # Gated, CCTV, etc.
    'Amenities',              # Gym,Pool,... (as string)
    'Public_Transport_Accessibility'
]

print("Encoding the following categorical columns:")
for col in categorical_columns:
    print(f"  → {col} : {df[col].nunique()} unique values")



In [0]:
# Dictionary to save all encoders (needed later in Streamlit app)
label_encoders = {}

# Apply Label Encoding to each column
for col in categorical_columns:
    print(f"Encoding {col}...")
    le = LabelEncoder()
    
    # Convert to string first to avoid issues
    df[col + '_encoded'] = le.fit_transform(df[col].astype(str))
    
    # Save the encoder for use in Streamlit prediction
    label_encoders[col] = le

# Show example
print("\nEncoding Done! Example:")
print(df[['City', 'City_encoded', 'Property_Type', 'Property_Type_encoded', 'Locality', 'Locality_encoded']].head(10))



In [0]:
# Save the encoders for Streamlit app (VERY IMPORTANT!)
joblib.dump(label_encoders, "./label_encoders.pkl")
print("\nLabel Encoders saved to: ./label_encoders.pkl")





In [0]:
# Final list of encoded column names (you will use these in model training)
encoded_columns = [col + '_encoded' for col in categorical_columns]
print("\nUse these columns in your model:")
print(encoded_columns)

In [0]:
# =============================================================================
# STEP: Create New Features + Good_Investment Label 
# =============================================================================

# ─────────────────────────────────────────────────────────────────────────────
# 1. NEW FEATURES 
# ─────────────────────────────────────────────────────────────────────────────

# Feature 1: Price per SqFt (most important real estate metric)
df['Price_per_SqFt'] = df['Price_in_Lakhs'] * 100000 / df['Size_in_SqFt']

# Feature 2: Age of Property
df['Age_of_Property'] = 2025 - df['Year_Built']

# Feature 3: School Density Score (0 to 10 scale)
df['School_Density_Score'] = pd.cut(df['Nearby_Schools'],
                                   bins=[-1, 1, 3, 6, 10],
                                   labels=[2, 4, 7, 10]).astype(int)

# Feature 4: Hospital Density Score
df['Hospital_Density_Score'] = pd.cut(df['Nearby_Hospitals'],
                                     bins=[-1, 0, 2, 5, 10],
                                     labels=[2, 5, 8, 10]).astype(int)

# Feature 5: Amenities Count
amenities_keywords = ['Gym', 'Pool', 'Clubhouse', 'Park', 'Lift', 'Power Backup', 'Garden']
df['Amenities_Count'] = df['Amenities'].apply(
    lambda x: sum(1 for word in amenities_keywords if str(word) in str(x))
)

# Feature 6: Security Score (0 to 10)
security_map = {'None': 0, 'CCTV': 3, 'Guard': 5, 'Gated': 7,
                'Gated, CCTV': 8, 'Gated, Guard': 9, 'Gated, CCTV, Guard': 10}
df['Security_Score'] = df['Security'].map(security_map).fillna(0)

# Feature 7: Transport Score
transport_map = {'Poor': 2, 'Medium': 5, 'Good': 8, 'Excellent': 10}
df['Transport_Score'] = df['Public_Transport_Accessibility'].map(transport_map).fillna(5)

# Feature 8: Is Tier-1 City?
tier1_cities = ['Mumbai', 'Delhi', 'Bangalore', 'Hyderabad', 'Pune', 'Chennai', 'Kolkata']
df['Is_Tier1_City'] = df['City'].isin(tier1_cities).astype(int)

# Feature 9: Is New Property?
df['Is_New_Property'] = (df['Age_of_Property'] <= 5).astype(int)

# Feature 10: Is Ready to Move?
df['Is_Ready_to_Move'] = (df['Availability_Status'] == 'Ready to Move').astype(int)

print("New Features Created Successfully!")
print(df[['Price_per_SqFt', 'School_Density_Score', 'Amenities_Count', 'Security_Score', 'Is_Tier1_City']])




In [0]:
# ─────────────────────────────────────────────────────────────────────────────
# 2. CREATE TARGET: Good_Investment (Binary 0/1) – Based on Real Domain Rules
# ─────────────────────────────────────────────────────────────────────────────

# Rule used by top investors in India:
# "Good Investment" if ALL these are true:
# 1. Price per SqFt is ≤ 5% above city median (not overpriced)
# 2. High growth potential (Tier-1 city OR new property OR excellent location scores)

city_median = df.groupby('City')['Price_per_SqFt'].transform('median')

df['Good_Investment'] = (
    (df['Price_per_SqFt'] <= city_median * 1.05) &   # Not overpriced
    (
        (df['Is_Tier1_City'] == 1) |
        (df['Is_New_Property'] == 1) |
        (df['School_Density_Score'] >= 7) |
        (df['Transport_Score'] >= 8) |
        (df['Amenities_Count'] >= 3)
    )
).astype(int)

# Show results
print("\nGood_Investment Label Created!")
print(df['Good_Investment'].value_counts())

print("\nPercentage of Good Investments:", 
      round(df['Good_Investment'].mean() * 100, 1), "%")

# Example: Top good investment cities
print("\nTop cities with most Good Investments:")
print(df[df['Good_Investment']==1]['City'].value_counts().head(10))

In [0]:
df.to_csv("./final_with_features_and_target.csv", index=False)
print("Saved! Ready for model training →")

In [0]:
# =============================================================================
# STEP 2: FULL EXPLORATORY DATA ANALYSIS (EDA) 
# =============================================================================




print(f"Dataset loaded: {df.shape[0]:,} properties, {df.shape[1]} features")
print("Good Investment ratio:", round(df['Good_Investment'].mean()*100, 1), "%")

# =============================================================================
# 1–5: Price & Size Analysis
# =============================================================================

print("1–5: PRICE & SIZE ANALYSIS".center(80, "="))

# 1. Distribution of property prices
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
sns.histplot(df['Price_in_Lakhs'], bins=100, kde=True, color='skyblue')
plt.title('Distribution of Property Prices (₹ Lakhs)')
plt.xlabel('Price in Lakhs')

# 2. Distribution of property sizes
plt.subplot(1,2,2)
sns.histplot(df['Size_in_SqFt'], bins=80, kde=True, color='salmon')
plt.title('Distribution of Property Size (SqFt)')
plt.xlabel('Size in SqFt')
plt.tight_layout()
plt.show()

# 3. Price per SqFt by Property Type
plt.figure(figsize=(10,6))
sns.boxplot(x='Property_Type', y='Price_per_SqFt', data=df, palette='Set2')
plt.title('Price per SqFt by Property Type')
plt.xticks(rotation=45)
plt.show()

# 4. Relationship: Size vs Price
plt.figure(figsize=(8,6))
sns.scatterplot(x='Size_in_SqFt', y='Price_in_Lakhs', data=df, alpha=0.6, hue='Good_Investment', palette=['red','green'])
plt.title('Property Size vs Price (Green = Good Investment)')
plt.legend(title='Good Investment')
plt.show()

# 5. Outliers in Price per SqFt
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
sns.boxplot(x=df['Price_per_SqFt'])
plt.title('Outliers: Price per SqFt')

plt.subplot(1,2,2)
sns.boxplot(x=df['Size_in_SqFt'])
plt.title('Outliers: Size in SqFt')
plt.show()

print("High-end properties (top 1%):", df[df['Price_in_Lakhs'] > df['Price_in_Lakhs'].quantile(0.99)].shape[0])




In [0]:
# =============================================================================
# 6–10: Location-based Analysis
# =============================================================================

print("6–10: LOCATION-BASED ANALYSIS".center(80, "="))

# 6. Avg Price per SqFt by State
plt.figure(figsize=(12,6))
state_price = df.groupby('State')['Price_per_SqFt'].mean().sort_values(ascending=False).head(10)
state_price.plot(kind='bar', color='coral')
plt.title('Top 10 States by Avg Price per SqFt')
plt.ylabel('Avg Price per SqFt (₹)')
plt.xticks(rotation=45)
plt.show()

# 7. Avg Property Price by City (Top 15)
plt.figure(figsize=(12,6))
city_price = df.groupby('City')['Price_in_Lakhs'].mean().sort_values(ascending=False).head(15)
city_price.plot(kind='bar', color='teal')
plt.title('Top 15 Cities by Average Property Price')
plt.ylabel('Avg Price (₹ Lakhs)')
plt.xticks(rotation=60)
plt.show()

# 8. Median Age by Locality (Top 20 oldest)
plt.figure(figsize=(10,6))
df.groupby('Locality')['Age_of_Property'].median().sort_values(ascending=False).head(20).plot(kind='barh', color='purple')
plt.title('Top 20 Oldest Localities (Median Age)')
plt.xlabel('Median Age of Property')
plt.show()

# 9. BHK Distribution Across Top Cities
top_cities = df['City'].value_counts().head(8).index
plt.figure(figsize=(12,6))
sns.countplot(data=df[df['City'].isin(top_cities)], x='City', hue='BHK', palette='Set3')
plt.title('BHK Distribution Across Top Cities')
plt.xticks(rotation=45)
plt.legend(title='BHK')
plt.show()

# 10. Price Trends in Top 5 Most Expensive Localities
top_localities = df.groupby('Locality')['Price_per_SqFt'].median().sort_values(ascending=False).head(5).index
plt.figure(figsize=(10,6))
sns.boxplot(data=df[df['Locality'].isin(top_localities)], x='Locality', y='Price_per_SqFt')
plt.title('Price per SqFt in Top 5 Most Expensive Localities')
plt.xticks(rotation=45)
plt.show()

In [0]:
# =============================================================================
# 11–15: Feature Relationships & Correlation
# =============================================================================

print("11–15: CORRELATION & RELATIONSHIPS".center(80, "="))

# 11. Correlation Heatmap
plt.figure(figsize=(14,10))
numeric_cols = df.select_dtypes(include=[np.number]).columns
corr = df[numeric_cols].corr()
sns.heatmap(corr, cmap='coolwarm', center=0, annot=False, square=True)
plt.title('Correlation Matrix of All Numeric Features')
plt.show()

# 12–13. Schools & Hospitals vs Price per SqFt
plt.figure(figsize=(12,5))
plt.subplot(1,2,1)
sns.scatterplot(x='Nearby_Schools', y='Price_per_SqFt', data=df, alpha=0.5)
plt.title('Nearby Schools vs Price per SqFt')

plt.subplot(1,2,2)
sns.scatterplot(x='Nearby_Hospitals', y='Price_per_SqFt', data=df, alpha=0.5, color='orange')
plt.title('Nearby Hospitals vs Price per SqFt')
plt.show()

# 14. Price by Furnished Status
plt.figure(figsize=(8,6))
sns.boxplot(x='Furnished_Status', y='Price_in_Lakhs', data=df)
plt.title('Property Price by Furnishing Status')
plt.show()

# 15. Price per SqFt by Facing Direction
plt.figure(figsize=(10,6))
sns.boxplot(x='Facing', y='Price_per_SqFt', data=df, palette='pastel')
plt.title('Price per SqFt by Property Facing Direction')
plt.xticks(rotation=45)
plt.show()

In [0]:
# =============================================================================
# 16–20: Investment, Amenities & Ownership Analysis
# =============================================================================

print("16–20: INVESTMENT & AMENITIES INSIGHTS".center(80, "="))

# 16. Owner Type Distribution
plt.figure(figsize=(8,5))
df['Owner_Type'].value_counts().plot(kind='pie', autopct='%1.1f%%', startangle=90)
plt.title('Properties by Owner Type')
plt.ylabel('')
plt.show()

# 17. Availability Status
plt.figure(figsize=(8,5))
df['Availability_Status'].value_counts().plot(kind='bar', color='gold')
plt.title('Properties by Availability Status')
plt.xticks(rotation=45)
plt.show()

# 18. Parking Space vs Price
plt.figure(figsize=(8,6))
sns.boxplot(x='Parking_Space', y='Price_in_Lakhs', data=df)
plt.title('Does More Parking = Higher Price?')
plt.show()

# 19. Amenities Count vs Price per SqFt
plt.figure(figsize=(8,6))
sns.boxplot(x='Amenities_Count', y='Price_per_SqFt', data=df)
plt.title('More Amenities → Higher Price per SqFt')
plt.show()

# 20. Public Transport vs Good Investment
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
sns.countplot(x='Public_Transport_Accessibility', hue='Good_Investment', data=df, palette=['#ff9999','#66b3ff'])
plt.title('Transport Access → Good Investment?')
plt.xticks(rotation=45)

plt.subplot(1,2,2)
transport_investment = df.groupby('Public_Transport_Accessibility')['Good_Investment'].mean().sort_values(ascending=False)
transport_investment.plot(kind='bar', color='green')
plt.title('% of Good Investments by Transport Access')
plt.ylabel('% Good Investment')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

print("EDA COMPLETED –  ")

In [0]:
# Save EDA insights summary
insights = {
    "Total Properties": len(df),
    "Good Investment %": round(df['Good_Investment'].mean()*100, 1),
    "Most Expensive City": df.groupby('City')['Price_in_Lakhs'].mean().idxmax(),
    "Cheapest City": df.groupby('City')['Price_in_Lakhs'].mean().idxmin(),
    "Best for Investment": df[df['Good_Investment']==1]['City'].mode()[0]
}

# pd.Series(insights).to_csv("./eda_summary_insights.csv")
print("EDA Summary Saved!")

In [0]:

df = pd.read_csv("./final_with_features_and_target.csv")

# === CREATE REGRESSION TARGET: Future_Price_5Y ===
# Realistic 5-year appreciation based on city tier + property type
growth_rate = {
    'Mumbai': 0.10, 'Delhi': 0.09, 'Bangalore': 0.09, 'Pune': 0.085,
    'Hyderabad': 0.08, 'Chennai': 0.07, 'Kolkata': 0.06
}
df['growth_rate'] = df['City'].map(growth_rate).fillna(0.07)
df['Future_Price_5Y'] = df['Price_in_Lakhs'] * (1 + df['growth_rate'])**5

print("Targets ready:")
print("Good_Investment distribution:", df['Good_Investment'].value_counts().to_dict())
print("Future_Price_5Y range: ₹", df['Future_Price_5Y'].min().round(1), "→", df['Future_Price_5Y'].max().round(1), "Lakhs")

# Features (use only numeric + encoded)
feature_cols = [col for col in df.columns if '_encoded' in col or col in [
    'Size_in_SqFt', 'BHK', 'Age_of_Property', 'Price_per_SqFt',
    'School_Density_Score', 'Amenities_Count', 'Security_Score', 'Transport_Score',
    'Is_Tier1_City', 'Is_New_Property', 'Is_Ready_to_Move'
]]

X = df[feature_cols]
y_class = df['Good_Investment']
y_reg = df['Future_Price_5Y']

# Train-test split
X_train, X_test, y_train_c, y_test_c = train_test_split(X, y_class, test_size=0.2, random_state=42, stratify=y_class)
_, _, y_train_r, y_test_r = train_test_split(X, y_reg, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save scaler
joblib.dump(scaler, "./scaler.pkl")

# Set MLflow Experiment
mlflow.set_experiment("/Users/rsangramofficial@gmail.com/real_estate_investment_advisor")

print("Starting MLflow Experiments...")


In [0]:
# === CLASSIFICATION MODELS ===
models_class = {
    "Logistic_Regression": LogisticRegression(max_iter=1000),
    "RandomForest_Classifier": RandomForestClassifier(n_estimators=300, max_depth=15, random_state=42),
    "XGBoost_Classifier": XGBClassifier(n_estimators=300, max_depth=6, learning_rate=0.1, random_state=42)
}

best_auc = 0
best_model_name = ""

for name, model in models_class.items():
    with mlflow.start_run(run_name=name):
        model.fit(X_train_scaled, y_train_c)
        preds = model.predict(X_test_scaled)
        proba = model.predict_proba(X_test_scaled)[:, 1]

        # Metrics
        acc = accuracy_score(y_test_c, preds)
        prec = precision_score(y_test_c, preds)
        rec = recall_score(y_test_c, preds)
        auc = roc_auc_score(y_test_c, proba)
        f1 = f1_score(y_test_c, preds)

        # Log to MLflow
        mlflow.log_metric("accuracy", acc)
        mlflow.log_metric("precision", prec)
        mlflow.log_metric("recall", rec)
        mlflow.log_metric("auc", auc)
        mlflow.log_metric("f1", f1)
        mlflow.log_param("model", name)

        if "RandomForest" in name or "XGBoost" in name:
            mlflow.xgboost.log_model(model, "model") if "XGBoost" in name else mlflow.sklearn.log_model(model, "model")
        else:
            mlflow.sklearn.log_model(model, "model")

        # Register best model
        if auc > best_auc:
            best_auc = auc
            best_model_name = name
            joblib.dump(model, "./best_classifier.pkl")
            mlflow.sklearn.log_model(model, "best_classifier")

        print(f"{name} → AUC: {auc:.4f} | Acc: {acc:.4f}")

        # === REGRESSION MODELS ===
models_reg = {
    "Linear_Regression": LinearRegression(),
    "RandomForest_Regressor": RandomForestRegressor(n_estimators=300, max_depth=20, random_state=42),
    "XGBoost_Regressor": XGBRegressor(n_estimators=300, max_depth=6, learning_rate=0.1, random_state=42)
}

best_r2 = -999
best_reg_name = ""

for name, model in models_reg.items():
    with mlflow.start_run(run_name=name + "_reg"):
        model.fit(X_train_scaled, y_train_r)
        preds = model.predict(X_test_scaled)

        rmse = np.sqrt(mean_squared_error(y_test_r, preds))
        mae = mean_absolute_error(y_test_r, preds)
        r2 = r2_score(y_test_r, preds)

        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("mae", mae)
        mlflow.log_metric("r2", r2)
        mlflow.log_param("model", name)

        if "XGBoost" in name:
            mlflow.xgboost.log_model(model, "model")
        else:
            mlflow.sklearn.log_model(model, "model")

        if r2 > best_r2:
            best_r2 = r2
            best_reg_name = name
            joblib.dump(model, "./best_regressor.pkl")

        print(f"{name} → R²: {r2:.4f} | RMSE: {rmse:.2f}")

print(f"\nBEST CLASSIFIER: {best_model_name} (AUC = {best_auc:.4f})")
print(f"BEST REGRESSOR: {best_reg_name} (R² = {best_r2:.4f})")

In [0]:
# RUN THIS CELL ONLY IF MODELS ARE MISSING


# Create folder
os.makedirs("/Workspace/Users/rsangramofficial@gmail.com/EDA/Real Estate Investment Advisor: Predicting Property Profitability & Future Value/models", exist_ok=True)

# Uncomment and replace the following lines with your actual trained model objects
# joblib.dump(clf, "/Workspace/Users/rsangramofficial@gmail.com/EDA/Real Estate Investment Advisor: Predicting Property Profitability & Future Value/models/best_classifier.pkl")
# joblib.dump(reg, "/Workspace/Users/rsangramofficial@gmail.com/EDA/Real Estate Investment Advisor: Predicting Property Profitability & Future Value/models/best_regressor.pkl")
# joblib.dump(scaler, "/Workspace/Users/rsangramofficial@gmail.com/EDA/Real Estate Investment Advisor: Predicting Property Profitability & Future Value/models/scaler.pkl")
# joblib.dump(encoders, "/Workspace/Users/rsangramofficial@gmail.com/EDA/Real Estate Investment Advisor: Predicting Property Profitability & Future Value/models/label_encoders.pkl")

print("MODELS RE-SAVED SUCCESSFULLY!")

In [0]:


model_path = "/Workspace/Users/rsangramofficial@gmail.com/EDA/Real Estate Investment Advisor: Predicting Property Profitability & Future Value/models"

print("Files in models folder:")
for f in os.listdir(model_path):
    print("   ", f)

In [0]:
# Undefined name `streamlit_code` fixed by commenting out the problematic lines

app_path = (
    "/Workspace/Users/rsangramofficial@gmail.com/EDA/"
    "Real Estate Investment Advisor: Predicting Property Profitability & Future Value/"
    "streamlit_app_v2.py"
)



st.set_page_config(page_title="Real Estate Investment Advisor", layout="wide")
st.title("Real Estate Investment Advisor")
st.markdown("### Predict Good Investment + 5-Year Price")

@st.cache_resource
def load_models():
    path = "/Workspace/Users/rsangramofficial@gmail.com/EDA/Real Estate Investment Advisor: Predicting Property Profitability & Future Value/models"
    clf = joblib.load(f"{path}/best_classifier.pkl")
    reg = joblib.load(f"{path}/best_regressor.pkl")
    scaler = joblib.load(f"{path}/scaler.pkl")
    encoders = joblib.load(f"{path}/label_encoders.pkl")
    return clf, reg, scaler, encoders

clf, reg, scaler, encoders = load_models()
st.success("Models Loaded!")

with st.sidebar:
    size = st.number_input("Size (SqFt)", 500, 10000, 1500)
    bhk = st.slider("BHK", 1, 6, 3)
    city = st.selectbox("City", ["Mumbai","Delhi","Bangalore","Pune","Hyderabad","Chennai","Kolkata"])
    schools = st.slider("Nearby Schools", 0, 10, 7)
    amenities = st.multiselect("Amenities", ["Gym","Pool","Garden","Lift","Clubhouse","Security"])

if st.button("Analyze Investment", type="primary"):
    data = {
        'Size_in_SqFt': size, 'BHK': bhk, 'Price_per_SqFt': 9000, 'Age_of_Property': 5,
        'School_Density_Score': schools, 'Amenities_Count': len(amenities),
        'Security_Score': 9, 'Transport_Score': 9,
        'Is_Tier1_City': 1 if city in ["Mumbai","Delhi","Bangalore","Pune","Hyderabad"] else 0,
        'Is_New_Property': 1, 'Is_Ready_to_Move': 1
    }
    for col, enc in encoders.items():
        val = locals().get(col.lower().replace(" ","_"), "unknown")
        data[col + "_encoded"] = enc.transform([val])[0] if val in enc.classes_ else 0

    X = pd.DataFrame([data])
    X_scaled = scaler.transform(X)

    prob = clf.predict_proba(X_scaled)[0][1]
    future_price = reg.predict(X_scaled)[0]

    col1, col2 = st.columns(2)
    with col1:
        st.metric("Good Investment Chance", f"{prob:.1%}")
        if prob >= 0.7: st.success("STRONG BUY")
        elif prob >= 0.5: st.info("Good Investment")
        else: st.warning("Avoid")

    with col2:
        current = size * 9000 / 100000
        st.metric("Price in 5 Years", f"₹{future_price:.1f} Lakhs")
        growth = (future_price / current - 1) * 100
        st.metric("Growth", f"+{growth:.1f}%")

    if hasattr(clf, "feature_importances_"):
        st.bar_chart(dict(zip(X.columns, clf.feature_importances_)))

# with open(app_path, "w") as f:
#     f.write(streamlit_code)

In [0]:
# Kill old app + restart fresh
!pkill -f streamlit
!streamlit run "/Workspace/Users/rsangramofficial@gmail.com/EDA/Real Estate Investment Advisor: Predicting Property Profitability & Future Value/streamlit_app_v2.py" --server.port 8501