In [6]:
import numpy as np
import csv
import chardet
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Calculate the cosine similarity between two texts
def calculate_cosine_similarity(text1, text2, threshold=0.6):
    if text1 is None or text2 is None:
        return 0.0
    
    vectorizer = CountVectorizer(min_df=1).fit_transform([text1, text2])
    vectors = vectorizer.toarray()
    similarity = cosine_similarity(vectors[0].reshape(1, -1), vectors[1].reshape(1, -1))
    
    if similarity[0][0] >= threshold:
        return similarity[0][0]
    else:
        return 0.0

def read_csv_data(file_path, column_name):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            keyword = row[column_name]
            data.append(row)  # Mengubah data menjadi baris lengkap dari CSV
    return data

def get_row_by_index(csv_data, index):
    if index < len(csv_data):
        return csv_data[index]
    return None

def get_columns_by_index(row, column_indices):
    selected_columns = [row[column_index] for column_index in column_indices]
    return selected_columns

# Asking User Input
user_input = input("Search: ")

# Retrieve extraction data from CSV file
csv_data = read_csv_data('Hasil_Ekstraksi_Clean.csv', 'Keywords')

# Calculate cosine similarity between each extracted data and user input
similarities = []
for row in csv_data:
    data = row['Keywords']
    similarity = calculate_cosine_similarity(data, user_input)
    similarities.append(similarity)

# Display the largest cosine similarity result
max_similarity = np.max(similarities)
max_index = np.argmax(similarities)
most_similar_row = get_row_by_index(csv_data, max_index)

columns_to_display = ['Name']  # Nama kolom yang ingin ditampilkan
selected_columns = get_columns_by_index(most_similar_row, columns_to_display)

if selected_columns is not None:
    print("Cosine similarity terbesar:", max_similarity)
    print("Baris yang terkait:")
    for column_name, value in zip(columns_to_display, selected_columns):
        print(f"{column_name}: {value}")


Search: milk


TypeError: string indices must be integers