<a href="https://colab.research.google.com/github/2303A51750/aim/blob/main/Labexam.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""Prediction of Startup Opportunities in Turkish Business
Q1. Identify the Top-10 Startups in Turkey
Q2. Find the Max and Min crowdsourced funding obtained by Turkey
Q3.Name the gender which has acquired the most funding for Startups
Q4.Identify the Top-5 cities where startup is most
Q5.Name the Top-5 technologies popular in Turkish startups.
Q6. Apply either Classification or Clustering or Regression Model and evaluate the accuracy, error metrics of the model."""

In [1]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt
from collections import Counter

In [None]:
file_path = 'turkishCF.csv'  # Update this with the correct path if running locally
data = pd.read_csv(file_path, delimiter=';')

In [None]:
print("Dataset Sample:")
print(data.head())

In [None]:
top_10_startups = data.nlargest(10, 'toplanan_tutar')[['proje_adi', 'toplanan_tutar']]
print("\nTop-10 Startups in Turkey:")
print(top_10_startups)

In [None]:
max_funding = data['toplanan_tutar'].max()
min_funding = data['toplanan_tutar'].min()
print("\nMax Crowdsourced Funding:", max_funding)
print("Min Crowdsourced Funding:", min_funding)

In [None]:
gender_funding = data.groupby('proje_sahibi_cinsiyet')['toplanan_tutar'].sum()
most_funded_gender = gender_funding.idxmax()
print("\nGender Acquiring the Most Funding:")
print(f"{most_funded_gender} with {gender_funding[most_funded_gender]} total funding.")


In [None]:
top_5_cities = data['konum'].value_counts().head(5)
print("\nTop-5 Cities with Most Startups:")
print(top_5_cities)


In [None]:
top_5_technologies = data['kategori'].value_counts().head(5)
print("\nTop-5 Technologies in Turkish Startups:")
print(top_5_technologies)

In [None]:
numerical_columns = ['hedef_miktari', 'toplanan_tutar', 'destekci_sayisi']
cluster_data = data[numerical_columns].dropna()


In [None]:
scaler = StandardScaler()
scaled_data = scaler.fit_transform(cluster_data)

In [None]:
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(scaled_data)


In [None]:
data['Cluster'] = -1
data.loc[cluster_data.index, 'Cluster'] = clusters

In [None]:
print("\nCluster Centers:")
print(kmeans.cluster_centers_)

In [None]:
plt.figure(figsize=(8, 6))
plt.scatter(scaled_data[:, 0], scaled_data[:, 1], c=clusters, cmap='viridis', alpha=0.6)
plt.title("K-Means Clustering of Startups")
plt.xlabel("Scaled Feature 1")
plt.ylabel("Scaled Feature 2")
plt.colorbar(label="Cluster")
plt.show()