<a href="https://colab.research.google.com/github/Swetha0713/AIML/blob/main/2303A52415_25_B33.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Load the dataset
data = pd.read_csv("/content/buddymove_holidayiq.csv")

# 1. Identify the top 5 attributes for South India destinations
attribute_means = data.iloc[:, 1:].mean().sort_values(ascending=False)
top_5_attributes = attribute_means.head(5)
print("Top 5 attributes for South Indian destinations:")
print(top_5_attributes)

# 2. Identify the attribute with the most liked travel spots
most_liked_attribute = attribute_means.idxmax()
print("\nMost liked travel attribute:", most_liked_attribute)

# 3. Find the max and min attributes of choice for South India tourism
max_attribute = attribute_means.idxmax()
min_attribute = attribute_means.idxmin()
print("\nMax attribute:", max_attribute)
print("Min attribute:", min_attribute)

# 4. Role of beaches, theatres, malls, and parks in South India tourism
specific_attributes = ["Theatre", "Shopping"]
print("\nRole of specific attributes:")
for attr in specific_attributes:
    print(f"{attr}: {attribute_means[attr]} (average score)")

# 5. Identify the sports with most attributes
# Assuming "Sports" is an attribute, we'll calculate its correlation with other attributes
# Exclude the 'User Id' column from correlation calculation
sports_correlation = data.drop(columns=['User Id']).corr()["Sports"].sort_values(ascending=False)
print("\nSports attribute correlations:")
print(sports_correlation)

# 6. Apply either classification or clustering model to evaluate the dataset

# Preprocessing for clustering
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data.iloc[:, 1:])

# Clustering using KMeans
kmeans = KMeans(n_clusters=3, random_state=42)
data["Cluster"] = kmeans.fit_predict(scaled_data)
print("\nCluster centers:")
print(kmeans.cluster_centers_)

# Classification example: Predict a cluster based on the attributes
X = data.iloc[:, 1:-1]  # Features
y = data["Cluster"]     # Target (clusters)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a RandomForestClassifier
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

# Evaluate the classifier
y_pred = clf.predict(X_test)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Top 5 attributes for South Indian destinations:
Nature       124.518072
Picnic       120.401606
Theatre      116.377510
Shopping     112.638554
Religious    109.779116
dtype: float64

Most liked travel attribute: Nature

Max attribute: Nature
Min attribute: Sports

Role of specific attributes:
Theatre: 116.37751004016064 (average score)
Shopping: 112.63855421686748 (average score)

Sports attribute correlations:
Sports       1.000000
Picnic       0.797777
Religious    0.623400
Theatre      0.611728
Nature       0.608372
Shopping     0.583956
Name: Sports, dtype: float64

Cluster centers:
[[ 0.73986327 -0.10300074  1.00745058  0.82308075 -0.14356567  0.47312211]
 [ 0.84827553  1.41327686 -0.32501768  0.08400616  1.40829171  0.89676263]
 [-0.92940902 -0.62286356 -0.53848777 -0.6116467  -0.59230393 -0.7683803 ]]

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.94      0.91        17
           1       0.91      0.83      0.87   