<a href="https://colab.research.google.com/github/Thribhuvan03/DSTlab/blob/main/11239M002_MINI_PROJECT.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import csv
from google.colab import files

# === UTILITY FUNCTIONS ===
def clean_text(text):
    return ' '.join(text.strip().lower().split())

def save_dataset(filename, dataset):
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['title', 'genre'])
        for title, genre in dataset:
            writer.writerow([title, genre])

def write_csv_report(filename, student_projects, project_statuses):
    with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
        fieldnames = ['Student', 'Project Title', 'Genre', 'Status']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for i, ((title, genre), status) in enumerate(zip(student_projects, project_statuses)):
            writer.writerow({
                'Student': f'Student {i + 1}',
                'Project Title': title,
                'Genre': genre,
                'Status': status
            })

# === BUILT-IN DATASET ===
project_dataset = [
    ('AI-based healthcare system', 'AI'),
    ('Blockchain voting platform', 'Blockchain'),
    ('Smart irrigation system using IoT', 'IoT'),
    ('Automated attendance system', 'Computer Vision'),
    ('E-commerce website with recommendations', 'Web'),
    ('Chatbot for customer service', 'NLP'),
    ('Fake news detection system', 'Machine Learning'),
    ('Face mask detection during pandemic', 'Computer Vision'),
    ('Autonomous delivery robot', 'Robotics'),
    ('Personal finance management app', 'Finance'),
]

# Prepare cleaned versions
cleaned_dataset = set((clean_text(title), clean_text(genre)) for title, genre in project_dataset)
cleaned_project_dataset = [(clean_text(title), clean_text(genre)) for title, genre in project_dataset]

# === STEP 1: ENTER STUDENT PROJECTS ===
num_students = int(input("Enter the number of students: "))
student_projects = []
for i in range(num_students):
    title = input(f"Enter project title of student {i + 1}: ")
    genre = input(f"Enter project genre of student {i + 1}: ")
    student_projects.append((title, genre))

# === STEP 2: CHECK FOR DUPLICATES ===
project_statuses = []

for idx, (title, genre) in enumerate(student_projects):
    cleaned_title = clean_text(title)
    cleaned_genre = clean_text(genre)
    if (cleaned_title, cleaned_genre) in cleaned_dataset:
        status = "Duplicate project (already in dataset)"
        # Find which dataset project it matches for clear message
        for dataset_title, dataset_genre in cleaned_project_dataset:
            if (cleaned_title, cleaned_genre) == (dataset_title, dataset_genre):
                print(f" → Student {idx + 1}'s project is a DUPLICATE of the dataset project: '{dataset_title.title()}'")
                break
    else:
        status = "New project (added to dataset)"
        print(f" → Student {idx + 1}'s project is NEW (not in dataset).")
        cleaned_project_dataset.append((cleaned_title, cleaned_genre))
        cleaned_dataset.add((cleaned_title, cleaned_genre))
    project_statuses.append(status)

# === STEP 3: SAVE UPDATED FILES ===
updated_dataset_file = "updated_project_dataset.csv"
report_file = "student_project_report.csv"

# Save cleaned dataset (capitalize titles for neatness)
save_dataset(updated_dataset_file, [(title.title(), genre.title()) for title, genre in cleaned_project_dataset])
write_csv_report(report_file, student_projects, project_statuses)

# === STEP 4: DOWNLOAD UPDATED FILES ===
print("\n✅ Processing complete! Download the updated files below:")

files.download(updated_dataset_file)
files.download(report_file)


Enter the number of students: 2
Enter project title of student 1: AI-based healthcare system
Enter project genre of student 1: AI
Enter project title of student 2: Cyber Threats
Enter project genre of student 2: Cyber
 → Student 1's project is a DUPLICATE of the dataset project: 'Ai-Based Healthcare System'
 → Student 2's project is NEW (not in dataset).

✅ Processing complete! Download the updated files below:


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>