In [None]:
# Card Details

import requests
from bs4 import BeautifulSoup
import csv

# URL of the webpage to scrape
url = 'https://wallethub.com/best-credit-cards'

# Set up headers to avoid blocking
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
}

# Send a GET request to the page
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')

# Define a list to store each card's details
credit_cards = []

# Find all cards on the page
for container in soup.find_all('section', class_='card-object'):
    card_data = {}

    # Extract card name if available
    name_tag = container.find('a', class_='link')
    if name_tag:
        card_data['Card Name'] = name_tag.get_text(strip=True)
    else:
        continue  # Skip this card if no title is found

    # Extract fields based on class size (5 or 6)
    info_box = container.find('div', class_='card-info-box card-fld-cnt des size-5')
    if info_box:
        for field in info_box.find_all('div', class_='card-fields des'):
            label_tag = field.find('div', class_='card-field-label jusaus')
            value_tag = field.find('div', class_='card-field-value')
            if label_tag and value_tag:
                label = label_tag.get_text(strip=True)
                value = value_tag.get_text(strip=True)
                card_data[label] = value

    info_box_size_6 = container.find('div', class_='card-info-box card-fld-cnt des size-6')
    if info_box_size_6:
        for field in info_box_size_6.find_all('div', class_='card-fields des'):
            label_tag = field.find('div', class_='card-field-label jusaus')
            value_tag = field.find('div', class_='card-field-value')
            if label_tag and value_tag:
                label = label_tag.get_text(strip=True)
                value = value_tag.get_text(strip=True)
                card_data[label] = value

    credit_cards.append(card_data)

# Define the exact fields we want to save
fieldnames = ['Card Name', 'Transfer Intro APR' ,'Regular APR', 'Annual Fee', 'Rewards Rate', 'Bonus Offer', 'Accepted Credit']

# Save the data to a CSV file
with open('credit_cards_info.csv', mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=fieldnames)
    writer.writeheader()
    for card in credit_cards:
        filtered_card = {key: card.get(key, '') for key in fieldnames}
        writer.writerow(filtered_card)

print("Data has been saved to credit_cards_info.csv")


Data has been saved to credit_cards_info.csv


In [None]:
# Card Reviews

import requests
from bs4 import BeautifulSoup
import csv

# List of URLs for each credit card on WalletHub
urls = [
    "https://wallethub.com/d/american-express-blue-cash-preferred-547c",
    "https://wallethub.com/d/chase-freedom-unlimited-2293c",
    "https://wallethub.com/d/delta-reserve-credit-card-1587c#reviews",
    "https://wallethub.com/d/chase-sapphire-reserve-credit-card-1842c",
    "https://wallethub.com/d/discover-it-secured-credit-card-2289c",
    "https://wallethub.com/d/citi-double-cash-card-121c",
    "https://wallethub.com/d/quicksilverone-rewards-75c",
    "https://wallethub.com/d/petal-cash-back-card-3249c",
    "https://wallethub.com/d/wells-fargo-active-cash-card-3346c",
    "https://wallethub.com/d/wells-fargo-reflect-card-3353c",
    "https://wallethub.com/d/citi-simplicity-567c",
    "https://wallethub.com/d/capital-one-venture-378c",
    "https://wallethub.com/d/discover-it-secured-credit-card-2289c",
    "https://wallethub.com/d/discover-it-credit-card-801c",
    "https://wallethub.com/d/wells-fargo-active-cash-card-3346c",
    "https://wallethub.com/d/capital-one-secured-credit-card-383c",
    "https://wallethub.com/d/american-express-gold-card-220c",
    "https://wallethub.com/d/chase-ink-preferred-credit-card-2312c",
    "https://wallethub.com/d/citi-premierpass-cardelite-level-125c",
    "https://wallethub.com/d/american-express-blue-70c",
    "https://wallethub.com/d/u-s-bank-altitude-go-visa-signature-card-3275c",
    "https://wallethub.com/d/pnc-cash-rewards-visa-credit-card-3100c",
    "https://wallethub.com/d/upgrade-triple-cash-3365c",
    "https://wallethub.com/d/synchrony-premier-world-mastercard-3350c",
    "https://wallethub.com/d/gold-delta-skymiles-214c",
    "https://wallethub.com/d/apple-credit-card-417c",
    "https://wallethub.com/d/marriott-bonvoy-boundless-3090c",
    "https://wallethub.com/d/american-express-blue-70c",

]

# Function to scrape reviews
def scrape_card_reviews(url, num_reviews=50):
    reviews = []

    # Send a request to fetch the HTML content
    response = requests.get(url)

    # Parse the HTML content
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract the card name using the <h2> tag with specific class
    card_name_tag = soup.find('h2', {'class': 'title extra-bold-font'})
    card_name = card_name_tag.text.strip() if card_name_tag else "Unknown Card"

    # Extract reviews based on class and itemprop attribute
    review_tags = soup.find_all('div', {'itemprop': 'description'})

    # Loop through reviews and extract the first `num_reviews` reviews
    for i, review in enumerate(review_tags[:num_reviews]):
        review_text = review.get_text().strip()
        reviews.append({
            'card_name': card_name,
            'review': review_text
        })

    return reviews

# Scrape reviews for each URL
all_reviews = []
for url in urls:
    reviews = scrape_card_reviews(url)
    all_reviews.extend(reviews)

# Write results to a CSV file
csv_file = 'credit_card_reviews_28.csv'
with open(csv_file, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=['card_name', 'review'])
    writer.writeheader()
    for review in all_reviews:
        writer.writerow(review)

print(f"Reviews have been saved to {csv_file}")


Reviews have been saved to credit_card_reviews_28.csv


In [None]:
## Simple Recommender System Based on Kaggle Dataset

import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class CreditCardRecommender:
    def __init__(self, data):
        """
        Initialize the recommender with credit card data

        Parameters:
        data (pandas.DataFrame): DataFrame containing credit card information
        """
        self.df = data.copy()

        # Preprocess the data
        self.preprocess_data()

        # Create feature matrix using TF-IDF
        self.create_feature_matrix()

    def preprocess_data(self):
        """
        Preprocess the dataset
        - Clean and combine relevant features
        - Remove duplicates
        """
        # Combine relevant text features
        self.df['combined_features'] = (
            self.df['Card Name'] + ' ' +
            self.df['Category'] + ' ' +
            self.df['Bank Name']
        )

        # Remove duplicates based on combined features
        self.df.drop_duplicates(subset='combined_features', inplace=True)

    def create_feature_matrix(self):
        """
        Create a TF-IDF feature matrix for content-based recommendations
        """
        # Use TF-IDF Vectorizer to convert text to numerical features
        self.tfidf = TfidfVectorizer(stop_words='english')
        self.feature_matrix = self.tfidf.fit_transform(self.df['combined_features'])

    def get_recommendations(self, card_name, top_n=3):
        """
        Get recommendations based on a given credit card

        Parameters:
        card_name (str): Name of the credit card to find similar cards for
        top_n (int): Number of recommendations to return

        Returns:
        pandas.DataFrame: Top recommended credit cards
        """

        print("Recommendations for ",card_name, "are : ")
        # Find the index of the input card
        try:
            card_index = self.df[self.df['Card Name'] == card_name].index[0]
        except IndexError:
            print(f"Card '{card_name}' not found in the dataset.")
            return pd.DataFrame()

        # Calculate cosine similarity
        similarity_scores = cosine_similarity(
            self.feature_matrix[card_index],
            self.feature_matrix
        ).flatten()

        # Get top N similar cards (excluding the input card itself)
        similar_indices = similarity_scores.argsort()[::-1][1:top_n+1]

        return self.df.iloc[similar_indices][['Card Name', 'Bank Name', 'Category']].to_string(index=False)


    def recommend_by_category(self, category, top_n=3):
        """
        Recommend cards within a specific category

        Parameters:
        category (str): Category of credit cards to recommend
        top_n (int): Number of recommendations to return

        Returns:
        pandas.DataFrame: Top recommended credit cards in the category
        """
        print("Recommendations for ",category, "are : ")
        category_cards = self.df[self.df['Category'] == category]

        if len(category_cards) == 0:
            print(f"No cards found in category '{category}'")
            return pd.DataFrame()

        # If fewer cards than top_n, return all
        return category_cards[['Card Name', 'Bank Name', 'Category']].head(
            min(top_n, len(category_cards)).to_string(index=False)
        )

# Create the dataset
data = pd.read_excel("/content/All_Reviews.xlsx")

# Initialize the recommender
recommender = CreditCardRecommender(data)



In [11]:
recommender.recommend_by_category("Lifestyle")

Recommendations for  Lifestyle are : 


Unnamed: 0,Card Name,Bank Name,Category
6,HDFC Platinum Edge,HDFC Bank,Lifestyle
89,HDFC Bank Titanium Times,HDFC Bank,Lifestyle
99,ICICI RUBYX MASTERCARD,ICICI,Lifestyle


In [12]:
recommender.get_recommendations("AXIS VISTARA")

Recommendations for  AXIS VISTARA are : 


Unnamed: 0,Card Name,Bank Name,Category
90,AXIS VISTARA SIGNATURE,Axis Bank,Travel
911,AXIS VISTARA INFINITE,Axis Bank,Travel
94,AXIS MILES AND MORE,Axis Bank,Travel
