In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from scipy.stats import spearmanr
from sklearn.feature_selection import mutual_info_classif, SelectKBest, f_classif
from statsmodels.stats.outliers_influence import variance_inflation_factor
import warnings
warnings.filterwarnings('ignore')

# Set style for better-looking plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Create output directories
Path('../../data/processed').mkdir(parents=True, exist_ok=True)
Path('../../outputs').mkdir(parents=True, exist_ok=True)
Path('../../outputs/figures').mkdir(parents=True, exist_ok=True)

print("="*80)
print("üìä TASK 1.5: FEATURE SELECTION & CORRELATION ANALYSIS")
print("="*80)

# Load the encoded dataset
df = pd.read_csv('../../data/processed/listings_with_categorical_encoding.csv')
print(f"\n‚úÖ Loaded dataset: {df.shape[0]:,} rows √ó {df.shape[1]} columns")

# Display basic info
print(f"\nDataset Info:")
print(f"   Total features: {df.shape[1]}")
print(f"   Target variable: value_encoded")

# Prepare Features and Target
print("\n" + "="*80)
print("üîß PREPARING FEATURES FOR ANALYSIS")
print("="*80)

# Separate target variable
target = 'value_encoded'
y = df[target]

# Identify feature columns to analyze
# Exclude: id, target, original categorical columns, date columns
exclude_cols = [
    'id', 'value_encoded', 'value_category',
    'property_type', 'room_type', 'neighbourhood_cleansed',
    'host_since', 'first_review', 'last_review'
]

# Check if 'city' column exists
if 'city' in df.columns:
    exclude_cols.append('city')

# Get all numeric feature columns
feature_cols = [col for col in df.columns if col not in exclude_cols]

# Further filter to only numeric columns
X = df[feature_cols].select_dtypes(include=[np.number])

print(f"\n‚úÖ Feature Matrix Prepared:")
print(f"   Total features for analysis: {X.shape[1]}")
print(f"   Target distribution:")
for val in sorted(y.unique()):
    count = (y == val).sum()
    pct = (count / len(y)) * 100
    label = ['Poor_Value', 'Fair_Value', 'Excellent_Value'][int(val)]
    print(f"      {val} ({label}): {count:,} ({pct:.2f}%)")

# Check for missing values
missing = X.isnull().sum().sum()
if missing > 0:
    print(f"\n‚ö†Ô∏è Warning: {missing} missing values found. Filling with median...")
    X = X.fillna(X.median())
else:
    print(f"\n‚úÖ No missing values in feature matrix")

print(f"\nFeature columns ({len(X.columns)}):")
for i, col in enumerate(X.columns, 1):
    print(f"   {i:2d}. {col}")

# Correlation Analysis with Target
print("\n" + "="*80)
print("üìà CORRELATION ANALYSIS WITH TARGET")
print("="*80)

# Calculate correlations with target
correlations = X.corrwith(y).abs().sort_values(ascending=False)

print(f"\nüîù Top 30 Features by Correlation with Target:")
print("-" * 80)
for i, (feature, corr) in enumerate(correlations.head(30).items(), 1):
    print(f"   {i:2d}. {feature:45s} | Correlation: {corr:.4f}")

print(f"\nüîª Bottom 10 Features by Correlation with Target:")
print("-" * 80)
for i, (feature, corr) in enumerate(correlations.tail(10).items(), 1):
    print(f"   {i:2d}. {feature:45s} | Correlation: {corr:.4f}")

# Save correlation results
corr_df = pd.DataFrame({
    'feature': correlations.index,
    'correlation_with_target': correlations.values
})
corr_df.to_csv('../../outputs/feature_target_correlations.csv', index=False)
print(f"\nüíæ Saved: outputs/feature_target_correlations.csv")

# Remove features with NaN correlation
X_clean = X.loc[:, X.corrwith(y).notna()]
print(f"\n‚úÖ Removed features with NaN correlation. New shape: {X_clean.shape}")

# Recalculate correlations
correlations = X_clean.corrwith(y).abs().sort_values(ascending=False)

# Multicollinearity Analysis (VIF)
print("\n" + "="*80)
print("üîç MULTICOLLINEARITY ANALYSIS (VIF)")
print("="*80)

# Select top 40 features for VIF analysis
top_features = correlations.head(40).index.tolist()
X_vif = X_clean[top_features].copy()

print(f"\n‚è≥ Calculating VIF for top 40 features...")
print(f"(This may take a moment...)")

# Calculate VIF
vif_data = pd.DataFrame()
vif_data["feature"] = X_vif.columns
vif_data["VIF"] = [variance_inflation_factor(X_vif.values, i) 
                   for i in range(len(X_vif.columns))]
vif_data = vif_data.sort_values('VIF', ascending=False)

print(f"\nüìä VIF Results (VIF > 10 indicates high multicollinearity):")
print("-" * 80)
print(f"{'Feature':<45s} | {'VIF':>10s} | {'Status'}")
print("-" * 80)

high_vif = []
for idx, row in vif_data.iterrows():
    status = "‚ö†Ô∏è HIGH" if row['VIF'] > 10 else "‚úÖ OK"
    print(f"{row['feature']:<45s} | {row['VIF']:>10.2f} | {status}")
    if row['VIF'] > 10:
        high_vif.append(row['feature'])

print(f"\nüìå Features with high multicollinearity (VIF > 10): {len(high_vif)}")
if high_vif:
    for feat in high_vif:
        print(f"   - {feat}")

# Save VIF results
vif_data.to_csv('../../outputs/vif_analysis.csv', index=False)
print(f"\nüíæ Saved: outputs/vif_analysis.csv")

# Feature Importance using Multiple Methods
print("\n" + "="*80)
print("üéØ FEATURE IMPORTANCE ANALYSIS")
print("="*80)

# Method 1: Mutual Information
print("\n1Ô∏è‚É£ Calculating Mutual Information...")
mi_scores = mutual_info_classif(X_clean, y, random_state=42)
mi_importance = pd.Series(mi_scores, index=X_clean.columns).sort_values(ascending=False)

# Method 2: ANOVA F-statistic
print("2Ô∏è‚É£ Calculating ANOVA F-scores...")
f_scores, _ = f_classif(X_clean, y)
f_importance = pd.Series(f_scores, index=X_clean.columns).sort_values(ascending=False)

# Method 3: Correlation (already calculated)
print("3Ô∏è‚É£ Using Correlation scores...")
corr_importance = correlations

# Combine all methods (normalized)
print("4Ô∏è‚É£ Combining importance scores...")

# Normalize each method to 0-1 scale
mi_norm = (mi_importance - mi_importance.min()) / (mi_importance.max() - mi_importance.min())
f_norm = (f_importance - f_importance.min()) / (f_importance.max() - f_importance.min())
corr_norm = (corr_importance - corr_importance.min()) / (corr_importance.max() - corr_importance.min())

# Combined score (average of all methods)
combined_importance = (mi_norm + f_norm + corr_norm) / 3
combined_importance = combined_importance.sort_values(ascending=False)

print(f"\nüèÜ Top 30 Features by Combined Importance Score:")
print("-" * 100)
print(f"{'Rank':<6s} | {'Feature':<45s} | {'Combined':>10s} | {'MI':>8s} | {'F-Score':>8s} | {'Corr':>8s}")
print("-" * 100)

for i, feature in enumerate(combined_importance.head(30).index, 1):
    print(f"{i:<6d} | {feature:<45s} | {combined_importance[feature]:>10.4f} | "
          f"{mi_norm[feature]:>8.4f} | {f_norm[feature]:>8.4f} | {corr_norm[feature]:>8.4f}")

# Save importance scores
importance_df = pd.DataFrame({
    'feature': combined_importance.index,
    'combined_score': combined_importance.values,
    'mutual_information': mi_norm[combined_importance.index].values,
    'f_score': f_norm[combined_importance.index].values,
    'correlation': corr_norm[combined_importance.index].values
})
importance_df.to_csv('../../outputs/feature_importance_scores.csv', index=False)
print(f"\nüíæ Saved: outputs/feature_importance_scores.csv")

# Feature Selection Strategy
print("\n" + "="*80)
print("‚úÇÔ∏è FEATURE SELECTION STRATEGY")
print("="*80)

# Strategy: Remove high VIF features, keep top features by importance
# Identify features to remove based on VIF and redundancy
features_to_remove = []

# Add high VIF features that are redundant
for feat in high_vif:
    # Keep the feature if it's in top 10 by importance, otherwise consider removing
    if feat not in combined_importance.head(10).index:
        features_to_remove.append(feat)

print(f"\nüóëÔ∏è Removing {len(features_to_remove)} highly collinear/redundant features:")
for feat in features_to_remove:
    if feat in combined_importance.index:
        vif_val = vif_data[vif_data['feature']==feat]['VIF'].values
        if len(vif_val) > 0:
            print(f"   - {feat} (VIF: {vif_val[0]:.2f})")

# Get remaining features after removing collinear ones
remaining_features = [f for f in combined_importance.index if f not in features_to_remove]

# Select top 28 features (target is 25-30)
n_features = 28
selected_features = remaining_features[:n_features]

print(f"\n‚úÖ Selected Top {n_features} Features:")
print("-" * 100)
print(f"{'Rank':<6s} | {'Feature':<45s} | {'Combined Score':>15s} | {'Correlation':>12s}")
print("-" * 100)

for i, feature in enumerate(selected_features, 1):
    print(f"{i:<6d} | {feature:<45s} | {combined_importance[feature]:>15.4f} | {correlations[feature]:>12.4f}")

# Save selected features list
selected_features_df = pd.DataFrame({
    'rank': range(1, len(selected_features) + 1),
    'feature': selected_features,
    'combined_score': [combined_importance[f] for f in selected_features],
    'correlation': [correlations[f] for f in selected_features]
})
selected_features_df.to_csv('../../outputs/selected_features_list.csv', index=False)
print(f"\nüíæ Saved: outputs/selected_features_list.csv")

# Create Final Dataset with Selected Features
print("\n" + "="*80)
print("üì¶ CREATING FINAL DATASET")
print("="*80)

# Create final dataset with selected features + target + id
final_columns = ['id'] + selected_features + ['value_encoded', 'value_category']
df_final = df[final_columns].copy()

print(f"\n‚úÖ Final Dataset Shape: {df_final.shape[0]:,} rows x {df_final.shape[1]} columns")
print(f"\nColumns included:")
print(f"   - id (identifier)")
print(f"   - {len(selected_features)} selected features")
print(f"   - value_encoded (target - numeric)")
print(f"   - value_category (target - categorical)")

# Save final dataset
df_final.to_csv('../../data/processed/listings_final_selected_features.csv', index=False)
print(f"\nüíæ Saved: data/processed/listings_final_selected_features.csv")

# Create a summary statistics file
print("\nüìä Generating summary statistics...")
summary_stats = df_final[selected_features].describe().T
summary_stats['missing'] = df_final[selected_features].isnull().sum()
summary_stats['dtype'] = df_final[selected_features].dtypes
summary_stats.to_csv('../../outputs/selected_features_summary_statistics.csv')
print("üíæ Saved: outputs/selected_features_summary_statistics.csv")

# ============================================================================
# VISUALIZATION SECTION 
# ============================================================================
print("\n" + "="*80)
print("üìä GENERATING VISUALIZATIONS")
print("="*80)

# VISUALIZATION 1: Correlation Heatmap of Selected Features
print("\n1Ô∏è‚É£ Creating correlation heatmap for selected features...")
fig, ax = plt.subplots(figsize=(20, 16))
corr_matrix = df_final[selected_features].corr()
mask = np.triu(np.ones_like(corr_matrix, dtype=bool))
sns.heatmap(corr_matrix, mask=mask, annot=False, cmap='coolwarm', 
            center=0, square=True, linewidths=0.5, 
            cbar_kws={"shrink": 0.8}, ax=ax)
plt.title('Correlation Heatmap of Selected 28 Features', fontsize=20, fontweight='bold', pad=20)
plt.tight_layout()
plt.savefig('../../outputs/figures/correlation_heatmap_selected_features.png', dpi=300, bbox_inches='tight')
plt.close()
print("   ‚úì Saved: outputs/figures/correlation_heatmap_selected_features.png")

# VISUALIZATION 2: Feature Importance Comparison (4-panel)
print("\n2Ô∏è‚É£ Creating feature importance comparison plot...")
fig, axes = plt.subplots(2, 2, figsize=(20, 16))

# Top 20 features for each method
top_n = 20

# Panel 1: Mutual Information
top_mi = mi_norm.sort_values(ascending=False).head(top_n)
axes[0, 0].barh(range(len(top_mi)), top_mi.values, color='steelblue')
axes[0, 0].set_yticks(range(len(top_mi)))
axes[0, 0].set_yticklabels(top_mi.index, fontsize=9)
axes[0, 0].set_xlabel('Normalized Score', fontsize=12)
axes[0, 0].set_title('Top 20 Features by Mutual Information', fontsize=14, fontweight='bold')
axes[0, 0].invert_yaxis()
axes[0, 0].grid(axis='x', alpha=0.3)

# Panel 2: F-Score
top_f = f_norm.sort_values(ascending=False).head(top_n)
axes[0, 1].barh(range(len(top_f)), top_f.values, color='coral')
axes[0, 1].set_yticks(range(len(top_f)))
axes[0, 1].set_yticklabels(top_f.index, fontsize=9)
axes[0, 1].set_xlabel('Normalized Score', fontsize=12)
axes[0, 1].set_title('Top 20 Features by ANOVA F-Score', fontsize=14, fontweight='bold')
axes[0, 1].invert_yaxis()
axes[0, 1].grid(axis='x', alpha=0.3)

# Panel 3: Correlation
top_corr = corr_norm.sort_values(ascending=False).head(top_n)
axes[1, 0].barh(range(len(top_corr)), top_corr.values, color='mediumseagreen')
axes[1, 0].set_yticks(range(len(top_corr)))
axes[1, 0].set_yticklabels(top_corr.index, fontsize=9)
axes[1, 0].set_xlabel('Normalized Score', fontsize=12)
axes[1, 0].set_title('Top 20 Features by Correlation', fontsize=14, fontweight='bold')
axes[1, 0].invert_yaxis()
axes[1, 0].grid(axis='x', alpha=0.3)

# Panel 4: Combined Score
top_combined = combined_importance.head(top_n)
axes[1, 1].barh(range(len(top_combined)), top_combined.values, color='mediumpurple')
axes[1, 1].set_yticks(range(len(top_combined)))
axes[1, 1].set_yticklabels(top_combined.index, fontsize=9)
axes[1, 1].set_xlabel('Combined Score', fontsize=12)
axes[1, 1].set_title('Top 20 Features by Combined Score', fontsize=14, fontweight='bold')
axes[1, 1].invert_yaxis()
axes[1, 1].grid(axis='x', alpha=0.3)

plt.suptitle('Feature Importance Comparison Across Methods', fontsize=18, fontweight='bold', y=1.00)
plt.tight_layout()
plt.savefig('../../outputs/figures/feature_importance_comparison.png', dpi=300, bbox_inches='tight')
plt.close()
print("   ‚úì Saved: outputs/figures/feature_importance_comparison.png")

# VISUALIZATION 3: VIF Analysis Visualization
print("\n3Ô∏è‚É£ Creating VIF analysis visualization...")
fig, ax = plt.subplots(figsize=(16, 10))

# Color code by VIF threshold
colors = ['red' if vif > 10 else 'orange' if vif > 5 else 'green' 
          for vif in vif_data['VIF']]

bars = ax.barh(range(len(vif_data)), vif_data['VIF'], color=colors, alpha=0.7)
ax.set_yticks(range(len(vif_data)))
ax.set_yticklabels(vif_data['feature'], fontsize=9)
ax.set_xlabel('VIF Score', fontsize=14, fontweight='bold')
ax.set_title('Variance Inflation Factor (VIF) Analysis - Top 40 Features', 
             fontsize=16, fontweight='bold', pad=20)
ax.axvline(x=10, color='red', linestyle='--', linewidth=2, label='High Multicollinearity (VIF > 10)')
ax.axvline(x=5, color='orange', linestyle='--', linewidth=2, label='Moderate Multicollinearity (VIF > 5)')
ax.invert_yaxis()
ax.grid(axis='x', alpha=0.3)
ax.legend(fontsize=12)

# Add text annotation
ax.text(0.98, 0.02, f'Features with VIF > 10: {len(high_vif)}', 
        transform=ax.transAxes, fontsize=12, verticalalignment='bottom',
        horizontalalignment='right', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

plt.tight_layout()
plt.savefig('../../outputs/figures/vif_analysis_visualization.png', dpi=300, bbox_inches='tight')
plt.close()
print("   ‚úì Saved: outputs/figures/vif_analysis_visualization.png")

# VISUALIZATION 4: Feature Selection Summary (4-panel)
print("\n4Ô∏è‚É£ Creating feature selection summary plot...")
fig = plt.figure(figsize=(20, 14))
gs = fig.add_gridspec(2, 2, hspace=0.3, wspace=0.3)

# Panel 1: Selected Features Correlation with Target
ax1 = fig.add_subplot(gs[0, 0])
selected_corrs = [correlations[f] for f in selected_features]
ax1.barh(range(len(selected_features)), selected_corrs, color='steelblue', alpha=0.7)
ax1.set_yticks(range(len(selected_features)))
ax1.set_yticklabels(selected_features, fontsize=8)
ax1.set_xlabel('Absolute Correlation with Target', fontsize=12)
ax1.set_title('Selected 28 Features - Correlation with Target', fontsize=14, fontweight='bold')
ax1.invert_yaxis()
ax1.grid(axis='x', alpha=0.3)

# Panel 2: Feature Selection Process
ax2 = fig.add_subplot(gs[0, 1])
stages = ['Original\nFeatures', 'After\nCleaning', 'After VIF\nAnalysis', 'Final\nSelected']
counts = [X.shape[1], X_clean.shape[1], len(remaining_features), n_features]
colors_bar = ['#3498db', '#2ecc71', '#f39c12', '#e74c3c']
bars = ax2.bar(stages, counts, color=colors_bar, alpha=0.7, edgecolor='black', linewidth=2)
ax2.set_ylabel('Number of Features', fontsize=12, fontweight='bold')
ax2.set_title('Feature Selection Process', fontsize=14, fontweight='bold')
ax2.grid(axis='y', alpha=0.3)
# Add value labels on bars
for bar, count in zip(bars, counts):
    height = bar.get_height()
    ax2.text(bar.get_x() + bar.get_width()/2., height,
             f'{int(count)}', ha='center', va='bottom', fontsize=14, fontweight='bold')

# Panel 3: Distribution of Combined Importance Scores
ax3 = fig.add_subplot(gs[1, 0])
ax3.hist(combined_importance.values, bins=30, color='mediumpurple', alpha=0.7, edgecolor='black')
ax3.axvline(combined_importance[selected_features[-1]], color='red', linestyle='--', 
            linewidth=2, label=f'Selection Threshold ({combined_importance[selected_features[-1]]:.3f})')
ax3.set_xlabel('Combined Importance Score', fontsize=12)
ax3.set_ylabel('Frequency', fontsize=12)
ax3.set_title('Distribution of Combined Importance Scores', fontsize=14, fontweight='bold')
ax3.legend(fontsize=10)
ax3.grid(alpha=0.3)

# Panel 4: VIF Distribution
ax4 = fig.add_subplot(gs[1, 1])
vif_bins = [0, 5, 10, 50, 200, vif_data['VIF'].max() + 1]
vif_labels = ['0-5\n(Low)', '5-10\n(Moderate)', '10-50\n(High)', '50-200\n(Very High)', '>200\n(Extreme)']
vif_counts = pd.cut(vif_data['VIF'], bins=vif_bins, labels=vif_labels).value_counts().sort_index()
colors_vif = ['green', 'yellowgreen', 'orange', 'orangered', 'red']
bars_vif = ax4.bar(range(len(vif_counts)), vif_counts.values, color=colors_vif, alpha=0.7, 
                   edgecolor='black', linewidth=2)
ax4.set_xticks(range(len(vif_counts)))
ax4.set_xticklabels(vif_labels, fontsize=10)
ax4.set_ylabel('Number of Features', fontsize=12, fontweight='bold')
ax4.set_title('VIF Distribution (Top 40 Features)', fontsize=14, fontweight='bold')
ax4.grid(axis='y', alpha=0.3)
# Add value labels
for bar, count in zip(bars_vif, vif_counts.values):
    height = bar.get_height()
    ax4.text(bar.get_x() + bar.get_width()/2., height,
             f'{int(count)}', ha='center', va='bottom', fontsize=12, fontweight='bold')

plt.suptitle('Feature Selection Summary Dashboard', fontsize=18, fontweight='bold', y=0.995)
plt.savefig('../../outputs/figures/feature_selection_summary.png', dpi=300, bbox_inches='tight')
plt.close()
print("   ‚úì Saved: outputs/figures/feature_selection_summary.png")

print("\n‚úÖ All 4 visualizations generated successfully!")

# ============================================================================
# FINAL SUMMARY
# ============================================================================
print("\n" + "="*80)
print("‚úÖ TASK 1.5: FEATURE SELECTION & CORRELATION ANALYSIS - COMPLETE")
print("="*80)

print("\nüìã SUMMARY REPORT")
print("-" * 80)
print(f"Original features analyzed: {X.shape[1]}")
print(f"Features after removing NaN correlations: {X_clean.shape[1]}")
print(f"Features analyzed for VIF: 40")
print(f"Features with high multicollinearity (VIF > 10): {len(high_vif)}")
print(f"Features removed due to multicollinearity: {len(features_to_remove)}")
print(f"Final selected features: {n_features}")
print(f"Reduction: {((X.shape[1]-n_features)/X.shape[1]*100):.1f}%")

print(f"\nüéØ KEY STATISTICS")
print("-" * 80)
selected_corrs = [correlations[f] for f in selected_features]
print(f"Average correlation with target: {np.mean(selected_corrs):.4f}")
print(f"Median correlation with target: {np.median(selected_corrs):.4f}")
print(f"Max correlation with target: {max(selected_corrs):.4f}")
print(f"Min correlation with target: {min(selected_corrs):.4f}")

print(f"\nüìÅ FILES GENERATED")
print("-" * 80)
print("Data Files (data/processed/):")
print("   ‚úì listings_final_selected_features.csv")
print("\nAnalysis Files (outputs/):")
print("   ‚úì feature_target_correlations.csv")
print("   ‚úì vif_analysis.csv")
print("   ‚úì feature_importance_scores.csv")
print("   ‚úì selected_features_list.csv")
print("   ‚úì selected_features_summary_statistics.csv")
print("\nVisualization Files (outputs/figures/):")
print("   ‚úì correlation_heatmap_selected_features.png")
print("   ‚úì feature_importance_comparison.png")
print("   ‚úì vif_analysis_visualization.png")
print("   ‚úì feature_selection_summary.png")

print(f"\n‚úÖ DATA QUALITY CHECK")
print("-" * 80)
missing_final = df_final[selected_features].isnull().sum().sum()
print(f"   Missing values in selected features: {missing_final}")
print(f"   All features validated across 3 importance methods: ‚úì")
print(f"   High multicollinearity features removed: ‚úì")
print(f"   Dataset ready for model training: ‚úì")

print(f"\nüöÄ NEXT STEPS (Task 1.6)")
print("-" * 80)
print("   1. Train/test split (80-20)")
print("   2. Feature scaling/normalization (StandardScaler)")
print("   3. Save processed datasets (X_train, X_test, y_train, y_test)")
print("   4. Ready for Week 2 model training")

print("\n" + "="*80)
print("‚ú® TASK 1.5 SUCCESSFULLY COMPLETED! ‚ú®")
print("="*80)

üìä TASK 1.5: FEATURE SELECTION & CORRELATION ANALYSIS

‚úÖ Loaded dataset: 19,912 rows √ó 91 columns

Dataset Info:
   Total features: 91
   Target variable: value_encoded

üîß PREPARING FEATURES FOR ANALYSIS

‚úÖ Feature Matrix Prepared:
   Total features for analysis: 73
   Target distribution:
      0 (Poor_Value): 6,571 (33.00%)
      1 (Fair_Value): 6,773 (34.01%)
      2 (Excellent_Value): 6,568 (32.99%)

‚úÖ No missing values in feature matrix

Feature columns (73):
    1. host_id
    2. host_response_rate
    3. host_acceptance_rate
    4. host_is_superhost
    5. host_listings_count
    6. host_total_listings_count
    7. host_has_profile_pic
    8. host_identity_verified
    9. latitude
   10. longitude
   11. accommodates
   12. bedrooms
   13. beds
   14. price
   15. minimum_nights
   16. maximum_nights
   17. minimum_minimum_nights
   18. maximum_minimum_nights
   19. minimum_maximum_nights
   20. maximum_maximum_nights
   21. minimum_nights_avg_ntm
   22. maximum_nigh