<a href="https://colab.research.google.com/github/MarciaFG/scimobility/blob/main/transformation_index_for_2007_2022.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Transformative Flows Project (2007-2022)**
**Author:** Marcia R. Ferreira (Complexity Science Hub Vienna & TU Wien)
- **Inputs:** 

1.   CWTS SQL Server [dimensions_2022jun]:


      *   Exported File:
      *   Exported File:


2.   CWTS Publication-level classification system: Meso-fields level [dimensions_2022jun_classification]
3.   Dimension reduction-based clustering: Laplacian matrix contructed from meso-field level topic matrix and second eigenvector of the matrix
4.   Dimensions database on BigQuery


- **Outputs:**

### Initialization and drivers

In [1]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime → "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime → "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Fri Apr 14 07:30:08 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100-SXM...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    46W / 400W |      0MiB / 40960MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# memory footprint support libraries/code
!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi
!pip install gputil
#!pip install psutil
#!pip install humanize
#!pip install pynput
#pip install plotly==5.4.0
!pip install patool

# main libraries
import psutil
import humanize
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import requests
import torch
import nltk
import GPUtil as GPU

# plotting
import plotly.graph_objs as go
import plotly.io as pio
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

# Google big query
from google.cloud import bigquery
from google.colab import files
%load_ext google.colab.data_table
%load_ext google.cloud.bigquery

# mount drive
from google.colab import drive
drive.mount('/content/drive')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gputil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: gputil
  Building wheel for gputil (setup.py) ... [?25l[?25hdone
  Created wheel for gputil: filename=GPUtil-1.4.0-py3-none-any.whl size=7408 sha256=9f74f49ad969c98a9d89fbdb856dfd2317947f65d6e57a95c0a313e2ad626803
  Stored in directory: /root/.cache/pip/wheels/2b/b5/24/fbb56595c286984f7315ee31821d6121e1b9828436021a88b3
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.4.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting patool
  Downloading patool-1.12-py2.py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.5/77.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packa

In [9]:
# Provide your credentials to the runtime
#from google.colab import auth
#auth.authenticate_user()
#print('Authenticated')

# declare your project 
#project_id = "cshdimensionstest"

### Data imports

In [None]:
""" NOT RUN
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

id = '1wCFzWEAwBqH47qGQG1_-G6wPgrrs03A6'
print(id) # Verify that you have everything

downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('second_eigenvector_clustering.csv')  
clusters = pd.read_csv('second_eigenvector_clustering.csv', sep=",", index_col=0) # Dataset is now stored in a Pandas Dataframe

print(clusters.head(10))
print("The data types are as follows:\n", clusters.dtypes)
print("The type of object is:\n", type(clusters))
"""

In [None]:
""" NOT RUN
# unzip the files exported from SQL Server
#!unzip "/content/drive/My Drive/TRANSFORMATION/data_export.zip"
#!unzip "/content/drive/My Drive/TRANSFORMATION/data_export.zip" > /dev/null
"""

In [None]:
!ls "/content/drive/My Drive"

In [3]:
import patoolib
import tempfile
import os

# Path of the zip file in Google Drive
zip_path = "/content/drive/My Drive/TRANSFORMATION/data_export.zip"

# Name of the CSV file(s) inside the zip
csv_file_names = [  "spectral_meso_clusters.csv"
                  , "for_division_labels.csv"
                  , "grid_ranks.csv"
                  , "trajectories_au_fourfive_skill.csv"
                  , "trajectories_au_morethanfive_skill.csv"
                  , "trajectories_au_single_skill.csv"
                  , "trajectories_au_twothree_skill.csv"]

# Separator character to use in the CSV files
separator = ";"

# Extract the zip file to a temporary directory
with tempfile.TemporaryDirectory() as tmpdir:
    patoolib.extract_archive(zip_path, outdir=tmpdir)
    
    # Load each CSV file into its own dataframe
    dfs = []
    for csv_file_name in csv_file_names:
        csv_file_path = os.path.join(tmpdir, csv_file_name)
        try:
            df = pd.read_csv(csv_file_path, sep=separator, encoding='utf-8', header= None, decimal=".")
            dfs.append(df)
        except pd.errors.ParserError:
            print(f"Error loading {csv_file_name}: Skipping...")

################################################################################

# Print the first few rows of each dataframe
for i, df in enumerate(dfs):
    print(f"Dataframe {i}:")
    print(df.head(2))
print("###########################################")

################################################################################

# extract the datasets and store them into a pandas dataframe
spectral_meso_clusters = dfs[0]
for_division_labels = dfs[1]
grid_ranks = dfs[2]
trajectories_au_fourfive_skill = dfs[3]
trajectories_au_morethanfive_skill = dfs[4]
trajectories_au_single_skill = dfs[5]
trajectories_au_twothree_skill = dfs[6]

################################################################################

print(type(for_division_labels))
print("###########################################")

patool: Extracting /content/drive/My Drive/TRANSFORMATION/data_export.zip ...
patool: running /usr/bin/7z x -o/tmp/tmpes_gl7og -- "/content/drive/My Drive/TRANSFORMATION/data_export.zip"
patool: ... /content/drive/My Drive/TRANSFORMATION/data_export.zip extracted to `/tmp/tmpes_gl7og'.
Dataframe 0:
          0                   1                 2         3         4   \
0  row_index  second_eigenvector  original_indices  cluster2  cluster3   
1          0  -0,657980785697483               128         0         0   

         5         6          7            8       9   \
0  cluster4  cluster5  cluster10  cluster_id2  n_pubs   
1         0         0          0          128   99353   

                                                  10  \
0                                             labels   
1  inhaler - dry powder inhaler - inhaler devices...   

                                                  11  
0                                            sources  
1  International Journal o

## Preprocessing

In [4]:
# Use the first row as the header
spectral_meso_clusters.columns = spectral_meso_clusters.iloc[0]
for_division_labels.columns = for_division_labels.iloc[0]
grid_ranks.columns = grid_ranks.iloc[0]

# Remove the first row (which is now the header)
spectral_meso_clusters = spectral_meso_clusters[1:]
for_division_labels = for_division_labels[1:]
grid_ranks = grid_ranks[1:]

print(spectral_meso_clusters.head())
print(for_division_labels.head())
print(grid_ranks.head())
print("###########################################")

def convert_to_float(val):
    if isinstance(val, str) and val.replace('.', '', 1).isdigit():
        return float(val.replace(',', '.'))
    return val

# Apply the function to all elements of the dataframe
grid_ranks = grid_ranks.applymap(convert_to_float)
spectral_meso_clusters = spectral_meso_clusters.applymap(convert_to_float)

print(grid_ranks.dtypes)
print(spectral_meso_clusters.dtypes)
print("###########################################")


from pandas.core.dtypes.dtypes import dtypes
from numpy.core.multiarray import dtype

headers = ['researcher_id', 'grid_id', 'start', 'end', 'Lenght', 'for_division_id', 'meso_field', 'spectral_cluster_id', 'concatenated_fields', 'year', 'n_pubs']

# set the new column names using the list
trajectories_au_morethanfive_skill.columns = headers
trajectories_au_fourfive_skill.columns = headers
trajectories_au_single_skill.columns = headers
trajectories_au_twothree_skill.columns = headers

# print the updated column names
print(trajectories_au_morethanfive_skill.columns)
print(trajectories_au_morethanfive_skill.dtypes)
print("###########################################")

print(trajectories_au_morethanfive_skill.describe())

0 row_index   second_eigenvector original_indices cluster2 cluster3 cluster4  \
1         0   -0,657980785697483              128        0        0        0   
2         9  -0,0796790037139393              109        4        3        2   
3         6  -0,0866583191228655              146        3        2        1   
4         7  -0,0835526247765448              120        3        2        1   
5         8  -0,0832388670665863              247        4        2        2   

0 cluster5 cluster10 cluster_id2  n_pubs  \
1        0         0         128   99353   
2        1         0         109  106502   
3        1         0         146   91599   
4        1         0         120  102555   
5        1         0         247   65569   

0                                             labels  \
1  inhaler - dry powder inhaler - inhaler devices...   
2  CRT response - CRT device - CRT implantation -...   
3  chiral selector - electrochromatography - plat...   
4  household air pollution - c

In [None]:
def calculate_org_sequence(df):
    # select the desired columns and drop duplicates
    df = df[['researcher_id', 'grid_id', 'for_division_id', 'start', 'end']].drop_duplicates().reset_index(drop=True)
    
    # concatenate start, end, and for_division_id columns
    df['concatenated'] = df['start'].astype(str) + '_' + df['end'].astype(str) + '_' + df['for_division_id'].astype(str)
    
    # calculate the org_sequence using rank() method that considers concatenated column
    df['org_sequence'] = df.groupby('researcher_id')['concatenated'].rank(method='dense')
    
    # drop the concatenated column
    df = df.drop('concatenated', axis=1)
    
    # return the researcher_id, grid_id, for_division_id, and org_sequence columns
    return df[['researcher_id', 'grid_id', 'for_division_id', 'org_sequence']]

################################################################################

def process_dataframe(df, org_seq_df):
    # merge the dataframes on researcher_id and grid_id
    merged_df = pd.merge(df, org_seq_df, on=['researcher_id', 'grid_id'], how='left')
    merged_df = merged_df.loc[:, ~merged_df.columns.str.endswith('_y')]
    merged_df = merged_df.rename(columns=lambda x: x[:-2] if x.endswith('_x') else x)

    # concatenate two columns
    merged_df['concatenated_2'] = merged_df['for_division_id'].astype(str) + ' - ' + merged_df['spectral_cluster_id'].astype(str)

    # select and aggregate columns
    selected_cols = ['researcher_id', 'grid_id', 'for_division_id', 'concatenated_2', 'org_sequence', 'n_pubs']
    selected_df = merged_df[selected_cols].drop_duplicates().reset_index(drop=True)
    aggregated_df = selected_df.groupby(['researcher_id', 'grid_id','for_division_id', 'concatenated_2','org_sequence']).sum().reset_index()

    return aggregated_df

################################################################################

sq_1_skill_df = process_dataframe(trajectories_au_single_skill, calculate_org_sequence(trajectories_au_single_skill))
sq_2_3_skill_df = process_dataframe(trajectories_au_twothree_skill, calculate_org_sequence(trajectories_au_twothree_skill))
sq_4_5_skill_df = process_dataframe(trajectories_au_fourfive_skill, calculate_org_sequence(trajectories_au_fourfive_skill))
sq_5_or_more_skill_df = process_dataframe(trajectories_au_morethanfive_skill, calculate_org_sequence(trajectories_au_morethanfive_skill))

################################################################################

print(sq_1_skill_df.head())
print(len(sq_1_skill_df))
print(len(trajectories_au_single_skill))

**Looks good!**

In [56]:
df_test = sq_2_3_skill_df[sq_2_3_skill_df["researcher_id"] == 'ur.010000002457.43']

## Cosine similarity scores

In [None]:
# attempt 1

from sklearn.metrics.pairwise import cosine_similarity

def calculate_cosine_similarity(df):
    df = df.sort_values(['researcher_id', 'org_sequence'])

    unique_researchers = df['researcher_id'].drop_duplicates()

    results = pd.DataFrame(columns=['researcher_id', 'prev_org_sequence', 'next_org_sequence', 'prev_grid_id', 'next_grid_id', 'prev_for_division_id','next_for_division_id', 'cosine_similarity'])

    for researcher_id in unique_researchers:
        researcher_data = df[df['researcher_id'] == researcher_id]
        max_org_sequence = researcher_data['org_sequence'].max()

        for org_sequence in range(1, int(max_org_sequence)):
            prev_data = researcher_data[researcher_data['org_sequence'] == org_sequence]
            next_data = researcher_data[researcher_data['org_sequence'] == org_sequence + 1]

            if not prev_data.empty and not next_data.empty:
                pivot_prev = prev_data.pivot_table(values='n_pubs', index='org_sequence', columns='concatenated_2', aggfunc=np.sum, fill_value=0)
                pivot_next = next_data.pivot_table(values='n_pubs', index='org_sequence', columns='concatenated_2', aggfunc=np.sum, fill_value=0)

                merged_pivot = pd.concat([pivot_prev, pivot_next], axis=0).fillna(0)

                prev_row = merged_pivot.iloc[[0]]
                next_row = merged_pivot.iloc[[1]]

                cosine_sim = cosine_similarity(prev_row, next_row)[0][0]

                result_row = {
                    'researcher_id': researcher_id, 
                    'prev_org_sequence': org_sequence, 
                    'next_org_sequence': org_sequence + 1,
                    'prev_grid_id': prev_data['grid_id'].values[0], 
                    'next_grid_id': next_data['grid_id'].values[0], 
                    'prev_for_division_id': prev_data['for_division_id'].values[0], 
                    'next_for_division_id': next_data['for_division_id'].values[0],
                    'cosine_similarity': cosine_sim
                }
                results = results.append(result_row, ignore_index=True)

    return results

test = calculate_cosine_similarity(df_test)
print(test)




In [None]:
# attempt 2

from sklearn.metrics.pairwise import cosine_similarity

def calculate_cosine_similarity(df):
    df = df.sort_values(['researcher_id', 'org_sequence'])

    unique_researchers = df['researcher_id'].drop_duplicates()

    results = pd.DataFrame(columns=['researcher_id', 'prev_org_sequence', 'next_org_sequence', 'prev_grid_id', 'next_grid_id', 'prev_for_division_id','next_for_division_id', 'cosine_similarity'])

    for researcher_id in unique_researchers:
        researcher_data = df[df['researcher_id'] == researcher_id]
        max_org_sequence = researcher_data['org_sequence'].max()
        unique_for_division_ids = researcher_data['for_division_id'].drop_duplicates()

        for for_division_id in unique_for_division_ids:
            division_data = researcher_data[researcher_data['for_division_id'] == for_division_id]

            for org_sequence in range(1, int(max_org_sequence)):
                prev_data = division_data[division_data['org_sequence'] == org_sequence]
                next_data = division_data[division_data['org_sequence'] == org_sequence + 1]

                if not prev_data.empty and not next_data.empty:
                    pivot_prev = prev_data.pivot_table(values='n_pubs', index='org_sequence', columns='concatenated_2', aggfunc=np.sum, fill_value=0)
                    pivot_next = next_data.pivot_table(values='n_pubs', index='org_sequence', columns='concatenated_2', aggfunc=np.sum, fill_value=0)

                    merged_pivot = pd.concat([pivot_prev, pivot_next], axis=0).fillna(0)

                    prev_row = merged_pivot.iloc[[0]]
                    next_row = merged_pivot.iloc[[1]]

                    cosine_sim = cosine_similarity(prev_row, next_row)[0][0]

                    result_row = {
                        'researcher_id': researcher_id, 
                        'prev_org_sequence': org_sequence, 
                        'next_org_sequence': org_sequence + 1,
                        'prev_grid_id': prev_data['grid_id'].values[0], 
                        'next_grid_id': next_data['grid_id'].values[0], 
                        'prev_for_division_id': prev_data['for_division_id'].values[0], 
                        'next_for_division_id': next_data['for_division_id'].values[0],
                        'cosine_similarity': cosine_sim
                    }
                    results = results.append(result_row, ignore_index=True)

    return results
    
test = calculate_cosine_similarity(df_test)
#print(test)


In [None]:
# attempt 3

from sklearn.metrics.pairwise import cosine_similarity

def calculate_cosine_similarity(df):
    df = df.sort_values(['researcher_id', 'org_sequence'])

    unique_researchers = df['researcher_id'].drop_duplicates()

    results = pd.DataFrame(columns=['researcher_id', 'prev_org_sequence', 'next_org_sequence', 'prev_grid_id', 'next_grid_id', 'prev_for_division_id','next_for_division_id', 'cosine_similarity'])

    for researcher_id in unique_researchers:
        researcher_data = df[df['researcher_id'] == researcher_id]
        max_org_sequence = researcher_data['org_sequence'].max()

        for org_sequence in range(1, int(max_org_sequence)):
            prev_data = researcher_data[researcher_data['org_sequence'] == org_sequence]
            next_data = researcher_data[researcher_data['org_sequence'] == org_sequence + 1]

            if not prev_data.empty and not next_data.empty:
                unique_for_division_ids = prev_data['for_division_id'].drop_duplicates()

                for for_division_id in unique_for_division_ids:
                    filtered_prev_data = prev_data[prev_data['for_division_id'] == for_division_id]
                    filtered_next_data = next_data[next_data['for_division_id'] == for_division_id]

                    if not filtered_prev_data.empty and not filtered_next_data.empty:
                        pivot_prev = filtered_prev_data.pivot_table(values='n_pubs', index='org_sequence', columns='concatenated_2', aggfunc=np.sum, fill_value=0)
                        pivot_next = filtered_next_data.pivot_table(values='n_pubs', index='org_sequence', columns='concatenated_2', aggfunc=np.sum, fill_value=0)

                        merged_pivot = pd.concat([pivot_prev, pivot_next], axis=0).fillna(0)

                        prev_row = merged_pivot.iloc[[0]]
                        next_row = merged_pivot.iloc[[1]]

                        cosine_sim = cosine_similarity(prev_row, next_row)[0][0]

                        result_row = {
                            'researcher_id': researcher_id, 
                            'prev_org_sequence': org_sequence, 
                            'next_org_sequence': org_sequence + 1,
                            'prev_grid_id': filtered_prev_data['grid_id'].values[0], 
                            'next_grid_id': filtered_next_data['grid_id'].values[0], 
                            'prev_for_division_id': for_division_id, 
                            'next_for_division_id': for_division_id,
                            'cosine_similarity': cosine_sim
                        }
                        results = results.append(result_row, ignore_index=True)

    return results

test = calculate_cosine_similarity(df_test)
print(test)


In [78]:
# attempt 4
# attempt 5
def get_cosine_similarity(researcher_df, prev_org_seq, next_org_seq, for_division_id):
    prev_df = researcher_df[(researcher_df['org_sequence'] == prev_org_seq) & (researcher_df['for_division_id'] == for_division_id)]
    next_df = researcher_df[(researcher_df['org_sequence'] == next_org_seq) & (researcher_df['for_division_id'] == for_division_id)]

    label_to_index = {label: idx for idx, label in enumerate(sorted(researcher_df['concatenated_2'].unique()))}
    
    prev_vector = np.zeros(len(label_to_index))
    next_vector = np.zeros(len(label_to_index))

    for _, row in prev_df.iterrows():
        code = row['concatenated_2']
        prev_vector[label_to_index[code]] = row['n_pubs']

    for _, row in next_df.iterrows():
        code = row['concatenated_2']
        next_vector[label_to_index[code]] = row['n_pubs']
    
    print("prev_vector:", prev_vector)
    print("next_vector:", next_vector)

    cosine_similarity = np.dot(prev_vector, next_vector) / (np.linalg.norm(prev_vector) * np.linalg.norm(next_vector))
    print("cosine_similarity:", cosine_similarity)
    return cosine_similarity

results = []

for researcher_id in df_test['researcher_id'].unique():
    researcher_df = df_test[df_test['researcher_id'] == researcher_id]
    org_sequences = sorted(researcher_df['org_sequence'].unique())

    for idx in range(len(org_sequences) - 1):
        prev_org_seq = org_sequences[idx]
        next_org_seq = org_sequences[idx + 1]

        for for_division_id in researcher_df['for_division_id'].unique():
            prev_df = researcher_df[(researcher_df['org_sequence'] == prev_org_seq) & (researcher_df['for_division_id'] == for_division_id)]
            next_df = researcher_df[(researcher_df['org_sequence'] == next_org_seq) & (researcher_df['for_division_id'] == for_division_id)]

            if not prev_df.empty and not next_df.empty:
                prev_grid_id = prev_df['grid_id'].iloc[0]
                next_grid_id = next_df['grid_id'].iloc[0]

                cosine_similarity = get_cosine_similarity(researcher_df, prev_org_seq, next_org_seq, for_division_id)

                results.append({
                    'researcher_id': researcher_id,
                    'prev_org_sequence': prev_org_seq,
                    'next_org_sequence': next_org_seq,
                    'prev_grid_id': prev_grid_id,
                    'next_grid_id': next_grid_id,
                    'prev_for_division_id': for_division_id,
                    'next_for_division_id': for_division_id,
                    'cosine_similarity': cosine_similarity
                })

output_table = pd.DataFrame(results)

prev_vector: [0. 0. 1. 0. 7. 0. 0. 0.]
prev_vector: [0. 0. 0. 0. 0. 0. 1. 0.]
prev_vector: [ 0.  0.  0.  0. 17.  0.  0.  0.]
prev_vector: [0. 0. 0. 0. 0. 0. 1. 0.]
prev_vector: [0. 0. 0. 1. 4. 0. 0. 0.]
prev_vector: [0. 0. 0. 0. 1. 0. 0. 0.]


In [77]:
output_table.head(100)
print(prev_vector)

NameError: ignored

In [50]:
sq_2_3_skill_df[sq_2_3_skill_df["researcher_id"] == 'ur.010000002457.43']

Unnamed: 0,researcher_id,grid_id,for_division_id,concatenated_2,org_sequence,n_pubs
17,ur.010000002457.43,grid.410726.6,4,4 - 85,4.0,1
18,ur.010000002457.43,grid.458451.9,4,4 - 2,1.0,1
19,ur.010000002457.43,grid.458451.9,4,4 - 85,1.0,7
20,ur.010000002457.43,grid.458451.9,6,6 - 85,1.0,1
21,ur.010000002457.43,grid.458451.9,7,7 - 85,1.0,1
22,ur.010000002457.43,grid.496923.3,3,3 - 85,3.0,1
23,ur.010000002457.43,grid.496923.3,4,4 - 5,3.0,1
24,ur.010000002457.43,grid.496923.3,4,4 - 85,3.0,9
25,ur.010000002457.43,grid.496923.3,5,5 - 85,3.0,1
26,ur.010000002457.43,grid.496923.3,6,6 - 85,3.0,1


In [51]:
cosine_similarity = dot_product(v1, v2) / (magnitude(v1) * magnitude(v2))

dot_product(v1, v2) = (1 * 0) + (7 * 13) + (0 * 1) = 0 + 91 + 0 = 91
magnitude(v1) = sqrt(1^2 + 7^2 + 0^2) = sqrt(1 + 49 + 0) = sqrt(50)
magnitude(v2) = sqrt(0^2 + 13^2 + 1^2) = sqrt



SyntaxError: ignored