In [13]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from sklearn.manifold import MDS
from scipy.cluster.hierarchy import linkage, dendrogram
from sklearn import manifold
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
from sklearn.metrics import pairwise_distances

# read in xls data
data = pd.read_excel(r"C:\Users\student\Downloads\Visual Analytics\P3\VA_Project2_data.xlsx")

# label encode categorical features
le = LabelEncoder()
data['Intelligence Agency'] = le.fit_transform(data['Intelligence Agency'])
data['File Id'] = le.fit_transform(data['File Id'])
data['Crime'] = le.fit_transform(data['Crime'])
data['Name'] = le.fit_transform(data['Name'])
data[' Location'] = le.fit_transform(data[' Location'])
data[' Country'] = le.fit_transform(data[' Country'])
data[' Month'] = le.fit_transform(data[' Month'])
data['Organization'] = le.fit_transform(data['Organization'])

# fill missing values with 0
data.fillna(0, inplace=True)

# select features to use
feature_names = ['Intelligence Agency', 'File Id',' Date', ' Country', ' Month', 'Year', 'Name', 'Organization']
X = data[feature_names].values

# standardize features
scaled_data = preprocessing.scale(X)

# compute pairwise dissimilarities
dissimilarities = pairwise_distances(scaled_data, metric='euclidean')

# compute MDS
mds = MDS(dissimilarity='precomputed')
X_mds = mds.fit_transform(dissimilarities)

# create hover text with file IDs and names
#hover_text = ["File ID: " + str(file_id) + "<br>" + "Name: " + name for file_id, name in zip(data['File Id'], data['Name'])]
 # create hover text with file IDs and names
#hover_text = ["File ID: " + str(file_id) + "<br>" + "Name: " + str(name) for file_id, name in zip(data['File Id'], data['Name'])]
 
# create hover text with file IDs, names, and other information
#hover_text = ["File ID: " + str(file_id) + "<br>" + "Name: " + name + "<br>" + " Date: " + str(date) + "<br>" + " Month: " + str(month) + "<br>" + "Year: " + str(year) + "<br>" + "Location: " + location + "<br>" + "Country: " + country for file_id, name, date, month, year, location, country in zip(data['File Id'], data['Name'], data[' Date'], data[' Month'], data['Year'], data[' Location'], data[' Country'])]
 
# create hover text with file IDs, names, dates, months, years, locations, and countries
hover_text = ["File ID: " + str(file_id) + "<br>" +
              "Name: " + str(name) + "<br>" +
              "Date: " + str(date) + "<br>" +
              "Month: " + str(month) + "<br>" +
              "Year: " + str(year) + "<br>" +
              "Location: " + str(location) + "<br>" +
              "Country: " + str(country) for file_id, name, date, month, year, location, country in
              zip(data['File Id'], data['Name'], data[' Date'], data[' Month'], data['Year'], data[' Location'], data[' Country'])]
    
    
# create categorical color scale based on country
countries = data[' Country'].unique()
color_scale = [np.where(countries == c)[0][0] for c in data[' Country']]

# create scatter plot with hover text and categorical color scale
fig = go.Figure(data=go.Scatter(
    x=X_mds[:, 0], 
    y=X_mds[:, 1],
    mode='markers',
    text=hover_text,
    hoverinfo='text',
        marker=dict(
    color=color_scale,
    colorscale='Viridis'
)

    )
)

fig.update_layout(title='Interactive Scatter Plot of MDS Results')
fig.show()




