<a href="https://colab.research.google.com/github/Garvit-562/Machine_Learning/blob/main/Pandas_operations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv('Unicorn_Companies.csv')
df.head()
df.isnull().sum()

Unnamed: 0,0
Company,0
Valuation,0
Date Joined,0
Industry,0
City,16
Country,0
Continent,0
Year Founded,0
Funding,0
Select Investors,1


In [None]:
# Clean 'Valuation' column
df['Valuation ($B)'] = (
    df['Valuation']
    .str.replace('$', '', regex=False)
    .str.replace('B', '', regex=False)
    .astype(float)
)

# Clean 'Funding' column - remove $, B, M and handle 'Unknown'
df['Funding_clean'] = (
    df['Funding']
    .str.replace('$', '', regex=False)
    .str.replace('B', '', regex=False)
    .str.replace('M', '', regex=False)
    .replace('Unknown', pd.NA)
)

# Convert to float
df['Funding ($B)'] = pd.to_numeric(df['Funding_clean'], errors='coerce')

# Adjust M to B (if original had 'M')
is_million = df['Funding'].str.contains('M', na=False)
df.loc[is_million, 'Funding ($B)'] = df.loc[is_million, 'Funding ($B)'] / 1000

# Drop helper column
df.drop(columns='Funding_clean', inplace=True)

#df.head(10)
without_str_df = df
df.isnull().sum()

Unnamed: 0,0
Company,0
Valuation,0
Date Joined,0
Industry,0
City,16
Country,0
Continent,0
Year Founded,0
Funding,0
Select Investors,1


In [None]:
# Convert 'Date Joined' to datetime
df['Date Joined'] = pd.to_datetime(df['Date Joined'])

# Extract datetime features
df['Join Year'] = df['Date Joined'].dt.year
df['Join Month'] = df['Date Joined'].dt.month_name()
df['Join DayName'] = df['Date Joined'].dt.day_name()

# Preview the result
print(df[['Company', 'Date Joined', 'Join Year', 'Join Month', 'Join DayName']].head(10))


        Company Date Joined  Join Year Join Month Join DayName
0     Bytedance  2017-04-07       2017      April       Friday
1        SpaceX  2012-12-01       2012   December     Saturday
2         SHEIN  2018-07-03       2018       July      Tuesday
3        Stripe  2014-01-23       2014    January     Thursday
4        Klarna  2011-12-12       2011   December       Monday
5         Canva  2018-01-08       2018    January       Monday
6  Checkout.com  2019-05-02       2019        May     Thursday
7     Instacart  2014-12-30       2014   December      Tuesday
8     JUUL Labs  2017-12-20       2017   December    Wednesday
9    Databricks  2019-02-05       2019   February      Tuesday


In [None]:
grouped = df.groupby('Country').agg({
    'Company': 'count',                  # Number of unicorns
    'Valuation ($B)': 'sum',            # Total valuation in billions
    'Funding ($B)': ['mean','sum'],              # Average and mean funding in billions

}).reset_index()

# Rename for clarity
grouped.columns = ['Country', 'Unicorn Count', 'Total Valuation ($B)', 'Avg Funding ($B)','Total Funding ($B)']

# Sort by total valuation descending
grouped = grouped.sort_values(by='Total Valuation ($B)', ascending=False)

# Preview result
print(grouped.head(10))
print(grouped[grouped['Country']== 'India'])

           Country  Unicorn Count  Total Valuation ($B)  Avg Funding ($B)  \
44   United States            562                1933.0          0.506577   
9            China            173                 696.0          0.661400   
19           India             65                 196.0          0.733277   
43  United Kingdom             43                 195.0          0.701419   
17         Germany             26                  72.0          0.630538   
38          Sweden              6                  63.0          1.624167   
1        Australia              8                  56.0          0.338500   
16          France             24                  55.0          0.563174   
7           Canada             19                  49.0          0.345778   
36     South Korea             12                  41.0          0.444417   

    Total Funding ($B)  
44             281.150  
9              112.438  
19              47.663  
43              30.161  
17              16.394  
38

In [None]:
# Case-insensitive check for 'Fintech' in Industry
fintech_unicorns = df[df['Industry'].str.contains('fintech', case=False, na=False)]
fintech_india = fintech_unicorns[fintech_unicorns['Country']=='India']
# Print count and sample
print(f"Total Fintech Unicorns: {len(fintech_unicorns)}")
print(fintech_unicorns[['Company', 'Industry']].head())
print("In INDIA----------------")
print(f"Total fintech unicorns :{len(fintech_india)}")
print(fintech_india[['Company', 'Industry','Country']])


Total Fintech Unicorns: 224
         Company Industry
3         Stripe  Fintech
4         Klarna  Fintech
6   Checkout.com  Fintech
10       Revolut  Fintech
12           FTX  Fintech
In INDIA----------------
Total fintech unicorns :15
                               Company Industry Country
74                            Razorpay  Fintech   India
84    National Stock Exchange of India  Fintech   India
87                                CRED  Fintech   India
162                          Pine Labs  Fintech   India
244                             Upstox  Fintech   India
260                    Digit Insurance  Fintech   India
312                           BharatPe  Fintech   India
339                              Groww  Fintech   India
372                       PolicyBazaar  Fintech   India
522                   CoinSwitch Kuber  Fintech   India
609                         CredAvenue  Fintech   India
630                           MobiKwik  Fintech   India
808             Acko General Insuran

In [None]:
# Ask for country name
Cname = input("Enter country's name: ")

# Filter the dataset by country
filtered = df[df['Country'].str.lower() == Cname.lower()]

# Group the filtered result (not strictly needed since one country, but for structure)
grouped = filtered.agg({
    'Company': 'count',
    'Valuation ($B)': 'sum',
    'Funding ($B)': 'mean'
}).to_frame().T

grouped.insert(0, 'Country', Cname.title())

# Rename columns
grouped.columns = ['Country', 'Unicorn Count', 'Total Valuation ($B)', 'Avg Funding ($B)']

# Show result
print(grouped)


Enter country's name: India
  Country  Unicorn Count  Total Valuation ($B)  Avg Funding ($B)
0   India           65.0                 196.0          0.733277


Unnamed: 0,Company,Valuation,Date Joined,Industry,City,Country,Continent,Year Founded,Funding,Select Investors
0,Bytedance,$180B,2017-04-07,Artificial intelligence,Beijing,China,Asia,2012,$8B,"Sequoia Capital China, SIG Asia Investments, S..."
1,SpaceX,$100B,2012-12-01,Other,Hawthorne,United States,North America,2002,$7B,"Founders Fund, Draper Fisher Jurvetson, Rothen..."
2,SHEIN,$100B,2018-07-03,E-commerce & direct-to-consumer,Shenzhen,China,Asia,2008,$2B,"Tiger Global Management, Sequoia Capital China..."
3,Stripe,$95B,2014-01-23,Fintech,San Francisco,United States,North America,2010,$2B,"Khosla Ventures, LowercaseCapital, capitalG"
4,Klarna,$46B,2011-12-12,Fintech,Stockholm,Sweden,Europe,2005,$4B,"Institutional Venture Partners, Sequoia Capita..."
