In [14]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the data
file_path = 'new_special_schools.csv'
schools_data = pd.read_csv(file_path)

# Fill NaN values in both 'ABOUT' and 'Name' columns
schools_data['ABOUT'] = schools_data['ABOUT'].fillna('')
schools_data['Name'] = schools_data['Name'].fillna('')

# Combine 'Name' and 'ABOUT' into a single column for vectorization
schools_data['combined'] = schools_data['Name'] + ' ' + schools_data['ABOUT']

# Initialize TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf_vectorizer.fit_transform(schools_data['combined'])

def recommend_schools(query, city=None):
    # Transform the query using the same vectorizer
    query_vector = tfidf_vectorizer.transform([query])
    
    # Calculate cosine similarity
    similarities = cosine_similarity(query_vector, tfidf_matrix).flatten()
    
    # Create a DataFrame to hold the results
    results_df = schools_data.copy()
    results_df['similarity'] = similarities
    
    # Filter results by city if specified
    if city:
        results_df = results_df[results_df['City/Town'].str.contains(city, case=False, na=False)]
    
    # Sort the results by similarity
    results_df = results_df.sort_values(by='similarity', ascending=False)
    
    # Return the top 5 recommendations
    return results_df[['Name', 'City/Town', 'ABOUT', 'similarity']].head(5)

# Example usage



In [11]:
query =input("Disability: ")
city=input("City: ")

Disability:  blind
City:  Thika 


In [15]:
recommendation_results = recommend_schools(query=query, city=city)
print(recommendation_results)

Empty DataFrame
Columns: [Name, City/Town, ABOUT, similarity]
Index: []
