In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!unzip "/content/drive/MyDrive/archive_dataset.zip" -d "/content/dataset"

In [None]:
import os
os.listdir('/content/dataset/Landscape Classification/Landscape Classification/Training Data')

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
from torch.utils.data import DataLoader, random_split
import os

In [None]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),   # Resize to fit ResNet50
    transforms.ToTensor(),           # Convert images to tensors
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize for ResNet
])

In [None]:
dataset_path = "/content/dataset/Landscape Classification/Landscape Classification/Training Data"

dataset_val_path = "/content/dataset/Landscape Classification/Landscape Classification/Validation Data"

dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

dataset_val = datasets.ImageFolder(root=dataset_val_path, transform=transform)

# Check class names
print(dataset.classes)
print(dataset_val.classes)

In [None]:
train_size = len(dataset)  
val_size = len(dataset_val) 
#train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(dataset_val, batch_size=32, shuffle=False)

In [None]:
model = models.resnet50(pretrained=True)

# Modify the final fully connected layer for our number of classes
num_features = model.fc.in_features
num_classes = len(dataset.classes)  # Number of labels in your dataset
model.fc = torch.nn.Linear(num_features, num_classes)

# Move to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

In [None]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Validation Accuracy: {accuracy:.2f}%')

In [None]:
torch.save(model.state_dict(), "/content/drive/MyDrive/resnet50_model.pth")

In [None]:
model.load_state_dict(torch.load("/content/drive/MyDrive/resnet50_model.pth"))
model.eval()

In [None]:
from PIL import Image
import matplotlib.pyplot as plt

# Load an image from local storage
image_path = "/content/drive/MyDrive/glacierimage.webp"  # Replace with your test image path
image = Image.open(image_path)

# Apply the same transformations used during training
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Convert the image
input_tensor = image_transform(image).unsqueeze(0)  # Add batch dimension

# Move to GPU if available
input_tensor = input_tensor.to(device)

# Predict the class
model.eval()
with torch.no_grad():
    output = model(input_tensor)
    predicted_class = torch.argmax(output).item()

# Get class labels
class_labels = dataset.classes
predicted_label = class_labels[predicted_class]

# Show result
plt.imshow(image)
plt.title(f"Predicted Label: {predicted_label}")
plt.axis("off")
plt.show()


In [None]:
import os
import numpy as np
import pandas as pd

import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist

import warnings
warnings.filterwarnings("ignore")

In [None]:
data = pd.read_csv("/content/drive/MyDrive/data.csv")
genre_data = pd.read_csv('/content/drive/MyDrive/data_by_genres.csv')
year_data = pd.read_csv('/content/drive/MyDrive/data_by_year.csv')

In [None]:
print(data.info())

In [None]:
print(genre_data.info())

In [None]:
print(year_data.info())

In [None]:
from yellowbrick.target import FeatureCorrelation

feature_names = ['acousticness', 'danceability', 'energy', 'instrumentalness',
       'liveness', 'loudness', 'speechiness', 'tempo', 'valence','duration_ms','explicit','key','mode','year']

X, y = data[feature_names], data['popularity']

# Create a list of the feature names
features = np.array(feature_names)

# Instantiate the visualizer
visualizer = FeatureCorrelation(labels=features)

plt.rcParams['figure.figsize']=(20,20)
visualizer.fit(X, y)     # Fit the data to the visualizer
visualizer.show()

In [None]:
def get_decade(year):
    period_start = int(year/10) * 10
    decade = '{}s'.format(period_start)
    return decade

data['decade'] = data['year'].apply(get_decade)

sns.set(rc={'figure.figsize':(11 ,6)})
sns.countplot(data['decade'])

In [None]:
sound_features = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'valence']
fig = px.line(year_data, x='year', y=sound_features)
fig.show()

In [None]:
top10_genres = genre_data.nlargest(10, 'popularity')

fig = px.bar(top10_genres, x='genres', y=['valence', 'energy', 'danceability', 'acousticness'], barmode='group')
fig.show()

In [None]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

cluster_pipeline = Pipeline([('scaler', StandardScaler()), ('kmeans', KMeans(n_clusters=10))])
X = genre_data.select_dtypes(np.number)
cluster_pipeline.fit(X)
genre_data['cluster'] = cluster_pipeline.predict(X)

In [None]:
# Visualizing the Clusters with t-SNE

from sklearn.manifold import TSNE

tsne_pipeline = Pipeline([('scaler', StandardScaler()), ('tsne', TSNE(n_components=2, verbose=1))])
genre_embedding = tsne_pipeline.fit_transform(X)
projection = pd.DataFrame(columns=['x', 'y'], data=genre_embedding)
projection['genres'] = genre_data['genres']
projection['cluster'] = genre_data['cluster']

fig = px.scatter(
    projection, x='x', y='y', color='cluster', hover_data=['x', 'y', 'genres'])
fig.show()

In [None]:
song_cluster_pipeline = Pipeline([('scaler', StandardScaler()),
                                  ('kmeans', KMeans(n_clusters=20,
                                   verbose=False))
                                 ], verbose=False)

X = data.select_dtypes(np.number)
number_cols = list(X.columns)
song_cluster_pipeline.fit(X)
song_cluster_labels = song_cluster_pipeline.predict(X)
data['cluster_label'] = song_cluster_labels

In [None]:
# Visualizing the Clusters with PCA

from sklearn.decomposition import PCA

pca_pipeline = Pipeline([('scaler', StandardScaler()), ('PCA', PCA(n_components=2))])
song_embedding = pca_pipeline.fit_transform(X)
projection = pd.DataFrame(columns=['x', 'y'], data=song_embedding)
projection['title'] = data['name']
projection['cluster'] = data['cluster_label']

fig = px.scatter(
    projection, x='x', y='y', color='cluster', hover_data=['x', 'y', 'title'])
fig.show()

In [None]:
!pip install spotipy

In [None]:
!pip install spotipy
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from collections import defaultdict
import os


# Replace 'YOUR_CLIENT_ID' and 'YOUR_CLIENT_SECRET' with your actual Spotify API credentials

client_id = '8cd4598f52a34584b7577957a57b99af'
client_secret = '86614e0eccbf4d2bb6d51a472fb202d4'
# Check if the environment variables are set
if not client_id or not client_secret:
    raise ValueError("Spotify API credentials not found in environment variables. Please set SPOTIPY_CLIENT_ID and SPOTIPY_CLIENT_SECRET.")

sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=client_id,
                                                           client_secret=client_secret))

def find_song(name, year):
    song_data = defaultdict()
    results = sp.search(q= 'track: {} year: {}'.format(name,year), limit=1)
    if results['tracks']['items'] == []:
        return None

    results = results['tracks']['items'][0]
    track_id = results['id']

    # Handle potential errors when fetching audio features
    try:
        audio_features = sp.audio_features(track_id)[0]
    except spotipy.exceptions.SpotifyException as e:
        print(f"Error fetching audio features for track {track_id}: {e}")
        return None

    song_data['name'] = [name]
    song_data['year'] = [year]
    song_data['explicit'] = [int(results['explicit'])]
    song_data['duration_ms'] = [results['duration_ms']]
    song_data['popularity'] = [results['popularity']]

    for key, value in audio_features.items():
        song_data[key] = value

    return pd.DataFrame(song_data)

In [None]:
from collections import defaultdict
from sklearn.metrics import euclidean_distances
from scipy.spatial.distance import cdist
import difflib

number_cols = ['valence', 'year', 'acousticness', 'danceability', 'duration_ms', 'energy', 'explicit',
 'instrumentalness', 'key', 'liveness', 'loudness', 'mode', 'popularity', 'speechiness', 'tempo']


def get_song_data(song, spotify_data):
    
    try:
        song_data = spotify_data[(spotify_data['name'] == song['name']) 
                                & (spotify_data['year'] == song['year'])].iloc[0]
        return song_data
    
    except IndexError:
        return find_song(song['name'], song['year'])
        

def get_mean_vector(song_list, spotify_data):
    
    song_vectors = []
    
    for song in song_list:
        song_data = get_song_data(song, spotify_data)
        if song_data is None:
            print('Warning: {} does not exist in Spotify or in database'.format(song['name']))
            continue
        song_vector = song_data[number_cols].values
        song_vectors.append(song_vector)  
    
    song_matrix = np.array(list(song_vectors))
    return np.mean(song_matrix, axis=0)


def flatten_dict_list(dict_list):
    
    flattened_dict = defaultdict()
    for key in dict_list[0].keys():
        flattened_dict[key] = []
    
    for dictionary in dict_list:
        for key, value in dictionary.items():
            flattened_dict[key].append(value)
            
    return flattened_dict


def recommend_songs( song_list, spotify_data, n_songs=10):
    
    metadata_cols = ['name', 'year', 'artists']
    song_dict = flatten_dict_list(song_list)
    
    song_center = get_mean_vector(song_list, spotify_data)
    scaler = song_cluster_pipeline.steps[0][1]
    scaled_data = scaler.transform(spotify_data[number_cols])
    scaled_song_center = scaler.transform(song_center.reshape(1, -1))
    distances = cdist(scaled_song_center, scaled_data, 'cosine')
    index = list(np.argsort(distances)[:, :n_songs][0])
    
    rec_songs = spotify_data.iloc[index]
    rec_songs = rec_songs[~rec_songs['name'].isin(song_dict['name'])]
    return rec_songs[metadata_cols].to_dict(orient='records')

In [None]:
recommend_songs([{'name': 'Come As You Are', 'year':1991},
                {'name': 'Smells Like Teen Spirit', 'year': 1991},
                {'name': 'Lithium', 'year': 1992},
                {'name': 'All Apologies', 'year': 1993},
                {'name': 'Stay Away', 'year': 1993}],  data)

In [None]:
# Assume 'data' is your DataFrame containing song information

# Create a dictionary to store keyword associations with song titles
keyword_associations = {
    "Forest": ["If I Were King Of The Forest - Extended Version", "Into The Forest Of Wild Beasts"],  # Add relevant songs
    "Glacier": ["Glaciers of Ice (feat. Ghostface Killah & Masta Killa)", "Past Ice and Ice and Even More"],  # Add relevant songs
    "Desert": ["Desert Places", "Night On The Desert"],  # Add relevant songs
    "Coast": ["Pacific Coast Highway", "West Coast"]  # Add relevant songs
}

def recommend_songs_by_keyword(keyword, spotify_data, n_songs=10):
    if keyword in keyword_associations:
        associated_songs = keyword_associations[keyword]
        song_list = []
        for song_name in associated_songs:
            # Check if the song exists in the data DataFrame
            if any(data['name'] == song_name):
                year = data[data['name'] == song_name]['year'].values[0]
                song_list.append({'name': song_name, 'year': year})
            else:
                print(f"Warning: Song '{song_name}' not found in data DataFrame.")
        # Proceed with recommendations if song_list is not empty
        if song_list:
            return recommend_songs(song_list, spotify_data, n_songs)
        else:
            return "No associated songs found in data DataFrame."
    else:
        return "Keyword not found in associations."

# Example usage
recommendations = recommend_songs_by_keyword(predicted_label, data)
print(recommendations)