In [1]:
import pandas as pd 
from datetime import datetime, timedelta

In [2]:
# 1. Read the data from the 'raises (2).csv' file.
raises_df = pd.read_csv('raises (2).csv')

# 2. Read the data from the 'Chainlink_updated.csv' file.
chainlink_df = pd.read_csv('Chainlink_updated.csv')

In [3]:
# 3. Look at the first columns in both dataframes.
raises_names = raises_df['Name'].tolist()
chainlink_names = chainlink_df['Project Name'].tolist()

In [4]:
# 4. Create the excluded and included DataFrames.
excluded_projects_df = raises_df[raises_df['Name'].isin(chainlink_names)]
included_projects_df = raises_df[~raises_df['Name'].isin(chainlink_names)].copy()

In [6]:
# 5. Convert the 'Date' column of the included_projects_df to pandas datetime format.
included_projects_df.loc[:, 'Date'] = pd.to_datetime(included_projects_df['Date'], errors='coerce')

# 6. Filter out rows where 'Date' is NaT (not a timestamp) from included_projects_df.
included_projects_df = included_projects_df.dropna(subset=['Date'])

# 7. Compute the cutoff date (today's date minus 6 months).
cutoff_date = datetime.today() - timedelta(days=6*30)  # Approximate 6 months as 6*30 days.

# 8. Create the recent_projects_df based on the 'Date' column from included_projects_df.
recent_projects_df = included_projects_df[included_projects_df['Date'] > cutoff_date]

In [10]:
# Reference Chainlink.ecosystem website see if done correctly
recent_projects_df.head(20)

Unnamed: 0,Name,Timestamp,Date,Amount Raised,Round,Description,Lead Investor,Category,Source,Valuation,Chains,Other Investors
0,Noble,1698105600,2023-10-24,3300000.0,Seed,Generic asset issuance chain purpose-built for...,Polychain,Infrastructure,https://x.com/noble_xyz/status/171675863432608...,,Cosmos,Wintermute Ventures + Circle Ventures + Hustle...
1,Blockaid,1698019200,2023-10-23,6000000.0,Seed,web3 security,,Smart contract security,https://x.com/TheBlock__/status/17164091493139...,,,Sequoia Capital + Greylock Partners + Cyberstarts
2,Blockaid,1698019200,2023-10-23,27000000.0,Series A,web3 security,Ribbit Capital + Variant,Smart contract security,https://x.com/TheBlock__/status/17164091493139...,,,Sequoia Capital + Greylock Partners + Cyberstarts
3,Upland,1697760000,2023-10-20,7000000.0,Series A+,metaverse game,EOS Network Ventures,Gaming,https://x.com/decryptmedia/status/171533747687...,,EOS,
4,Beluga,1697673600,2023-10-19,4000000.0,Seed,crypto platform for onboarding new users,Fin Capital,Web3,https://x.com/CoinDesk/status/1715049770652148118,,,Anagram + UDHC + Dispersion Capital + Aptos La...
7,Ryder,1697673600,2023-10-19,1200000.0,Seed,Hardware wallet,Oak Grove Ventures,Hardware,https://x.com/decryptmedia/status/171503523258...,,,Bitcoin Frontier Fund + Muneeb Ali + SBX Capital
8,Elixir Protocol,1697587200,2023-10-18,7500000.0,Series A,"decentralized, algorithmic market making protocol",Hack VC,DEX,https://x.com/TheBlock__/status/17146362114599...,100.0,,NGC Ventures + Angellist Ventures + Bloccelera...
9,Darewise,1697587200,2023-10-18,3500000.0,Token Pre-Sale,onchain gaming,,Gaming,https://x.com/animocabrands/status/17145614767...,,Bitcoin,Animoca Brands + GameFi Ventures
10,Fileverse,1697500800,2023-10-17,1500000.0,Pre-Seed,onchain space for collaboration,,Web3,https://x.com/fileverse/status/171428833157047...,,,Gnosis + F.actor + Mask Network + Safe + Water...
12,Myshell,1697414400,2023-10-16,5600000.0,Seed,web3-enabled AI platform,Ince Capital,Web3,https://x.com/TheBlock__/status/17139026393043...,5657.0,opBNB,Hashkey Capital + Folius Ventures + SevenX Ven...


In [11]:
# it is correct

In [12]:
# Display unique values from the "Round" column
unique_round_values = included_projects_df["Round"].unique()
print(unique_round_values)

['Seed' 'Series A' 'Series A+' 'Token Pre-Sale' 'Pre-Seed' 'Strategic '
 'Seed+' 'Private' 'pre-Series A' 'Angel Round' nan 'Equity + Token'
 'Seed and Strategic' 'Strategic Round' 'Venture Round' 'Series C'
 'Series B' 'Private Round' 'Series C+' 'Presale' 'Public token sale'
 'Private token sale' 'Private + Public' 'Pre-seed+' 'Institutional'
 'SAFT' 'IFO' 'Preseed' 'TwelveFold Auction' 'Series pre-A' 'IPO'
 'Series A-4' 'Series A2' 'Equity' 'Convertible note issuance'
 'Bridge Round' 'Strategic Private Sale' 'Private Equity'
 'Ecosystem Round' 'Ecosystem Foundation raise'
 'Public Offering Common Stock' 'Series A & B' 'DAO round' '3rd round'
 'Series D' 'Loan' 'Post-IPO Equity' 'Series F' 'pre-Series B' 'IEO' 'ICO'
 'Seed & Private' 'Grant' 'Series C-1' 'Series E' 'Post-IPO Debt'
 'Debt Financing' 'Corporate Round' 'Series B2' 'Secondary Market'
 'Series B1' 'Equity Crowdsale' 'Series B-1' 'Seed and Private Sale'
 'Private Placement' 'Funding Round' 'pre-IPO' 'Product Crowdfunding']

In [15]:
# Create a copy of recent_projects_df to avoid the warning
recent_projects_df_copy = recent_projects_df.copy()

# 1. Replace missing values with a placeholder value (e.g., -1).
recent_projects_df_copy.loc[:, "Amount Raised"] = recent_projects_df_copy["Amount Raised"].fillna(-1)

# 2. Convert the "Amount Raised" column to a numeric type.
recent_projects_df_copy.loc[:, "Amount Raised"] = pd.to_numeric(recent_projects_df_copy["Amount Raised"], errors='coerce')

# 3. Sort the dataframe by the "Amount Raised" column.
sorted_recent_projects_df = recent_projects_df_copy.sort_values(by="Amount Raised", ascending=True)

# 4. Replace the placeholder value with "N/A".
sorted_recent_projects_df.loc[sorted_recent_projects_df["Amount Raised"] == -1, "Amount Raised"] = "N/A"


In [26]:
# Display the sorted DataFrame (optional)
sorted_recent_projects_df

Unnamed: 0,Name,Timestamp,Date,Amount Raised,Round,Description,Lead Investor,Category,Source,Valuation,Chains,Other Investors
289,Aki Network,1682640000,2023-04-28,,Seed,,Emoote + Mask Network + Akatsuki,,https://twitter.com/aki_protocol/status/165187...,40,,MZ Web3fund + Incuba Alpha + Meteorite Labs + ...
55,OnRamp Money,1695081600,2023-09-19,,Strategic,Fiat-to-crypto onramp,Algorand Ventures,,https://x.com/onrampmoney/status/1704106022993...,,,
59,ProsperEx,1694908800,2023-09-17,,Strategic,"decentralized exchange, blending RWA and AI",Magnus Capital,DEX,https://x.com/prosper_ex/status/17032530045674...,,,
210,Decentralized Gaming Ventures,1686787200,2023-06-15,,Seed,Web3 game venture builder,Hashed,Gaming,https://twitter.com/techinasia/status/16692233...,,,
203,Animoca Brands,1687132800,2023-06-19,,Strategic,Animoca Brands focuses on driving digital prop...,Mitsu,Gaming,https://www.animocabrands.com/mitsui-forms-a-s...,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
228,Magic Labs,1685491200,2023-05-31,52000000.0,Strategic,crypto wallet infrastructure,PayPal Ventures,Infrastructure,https://twitter.com/magic_labs/status/16639090...,,,Cherubic Ventures + Synchrony + Northzone + Vo...
149,Futureverse,1689638400,2023-07-18,54000000.0,Series A,AI and metaverse technologies platform,10T Holdings,Metaverse,https://twitter.com/CoinDesk/status/1681281258...,,,Ripple Labs
135,Flashbots,1690243200,2023-07-25,60000000.0,Series B,research and development company that focuses ...,Paradigm,MEV,https://twitter.com/CoinDesk/status/1683884630...,1000,Ethereum,
261,Auradine,1684195200,2023-05-16,81000000.0,Series A,Blockchain infrastructure solutions,Celesta Capital + Mayfield,Infrastructure,https://twitter.com/Auradine_Inc/status/165848...,,,Marathon Digital Holdings + Cota Capital + DCV...


In [23]:
unique_round_values = sorted_recent_projects_df["Round"].unique()
print(unique_round_values)

['Seed' 'Strategic ' nan 'Private' 'pre-Series A' 'Pre-Seed' 'Angel Round'
 'Private token sale' 'Seed+' 'Private Round' 'Public token sale'
 'Strategic Round' 'Venture Round' 'Private + Public' 'Token Pre-Sale'
 'Presale' 'Seed and Strategic' 'Series A' 'Series A+' 'Pre-seed+'
 'Series C' 'Equity + Token' 'Series B' 'Series C+']


In [25]:
# List of values to be removed from the "Round" column
round_values_to_remove = ['pre-Series A', 'Series A', 'Series A+', 'Series C', 'Series B', 'Series C+']

# Filter out rows with the undesired "Round" values
filtered_df = sorted_recent_projects_df[~sorted_recent_projects_df['Round'].isin(round_values_to_remove)]

# Display the filtered DataFrame (optional)
filtered_df


Unnamed: 0,Name,Timestamp,Date,Amount Raised,Round,Description,Lead Investor,Category,Source,Valuation,Chains,Other Investors
289,Aki Network,1682640000,2023-04-28,,Seed,,Emoote + Mask Network + Akatsuki,,https://twitter.com/aki_protocol/status/165187...,40,,MZ Web3fund + Incuba Alpha + Meteorite Labs + ...
55,OnRamp Money,1695081600,2023-09-19,,Strategic,Fiat-to-crypto onramp,Algorand Ventures,,https://x.com/onrampmoney/status/1704106022993...,,,
59,ProsperEx,1694908800,2023-09-17,,Strategic,"decentralized exchange, blending RWA and AI",Magnus Capital,DEX,https://x.com/prosper_ex/status/17032530045674...,,,
210,Decentralized Gaming Ventures,1686787200,2023-06-15,,Seed,Web3 game venture builder,Hashed,Gaming,https://twitter.com/techinasia/status/16692233...,,,
203,Animoca Brands,1687132800,2023-06-19,,Strategic,Animoca Brands focuses on driving digital prop...,Mitsu,Gaming,https://www.animocabrands.com/mitsui-forms-a-s...,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
255,Story Protocol,1684281600,2023-05-17,29300000.0,Seed,"platform offers a way to create, govern, and ...",a16z crypto,Metaverse,https://twitter.com/NFTgators/status/165881493...,,,Hashed + Samsung Next + Mirana Corp + Dao5 + T...
131,Hi,1690416000,2023-07-27,30000000.0,Strategic,Web3 Neo-banking,Animoca Brands,Banking,https://twitter.com/animocabrands/status/16845...,,,
145,Cosmic Wire,1689724800,2023-07-19,30000000.0,Seed,web3 infrastructure provider,Polygon Ventures + Solana Foundation,Infrastructure,https://twitter.com/CoinDesk/status/1681711434...,,,
46,Proof of Play,1695254400,2023-09-21,33000000.0,Seed,onchain game studio,a16z + Greenoaks Capital,Gaming,https://x.com/ProofOfPlay/status/1704876956762...,,,Naval Ravikant + Balaji Srinivasan + Justin Ka...


In [27]:
# Count the number of rows in the filtered_df DataFrame
num_rows = filtered_df.shape[0]

# Display the count
print(f"The number of rows in the filtered DataFrame is: {num_rows}")

The number of rows in the filtered DataFrame is: 247


In [28]:
# Save the filtered_df DataFrame to a CSV file
filtered_df.to_csv('filtered_projects.csv', index=False)
