In [1]:
from pathlib import Path
from nbformat import v4 as nbf

notebook = nbf.new_notebook()

cells = []

# Title Cell
cells.append(nbf.new_markdown_cell("# 🦄 Indian Unicorn Startup Analysis\n"
                                   "This notebook performs EDA (Exploratory Data Analysis) on the dataset "
                                   "containing information about Indian Unicorns as of June 2023."))

# Import Libraries
cells.append(nbf.new_code_cell("import pandas as pd\nimport matplotlib.pyplot as plt\nfrom collections import Counter\n\nplt.style.use('ggplot')"))

# Load Data
cells.append(nbf.new_code_cell("df = pd.read_csv('../data/unicorn_data.csv')"))

# Clean Columns
cells.append(nbf.new_code_cell(
    "df.columns = df.columns.str.strip().str.replace(' ', '_').str.replace('($)', '', regex=True).str.lower()\n"
    "df.rename(columns={'entry_valuation^^_($b)': 'entry_valuation'}, inplace=True)"
))

# Clean Currency Columns
cells.append(nbf.new_code_cell(
    "df['entry_valuation'] = df['entry_valuation'].str.replace('$', '').str.replace('B', '').astype(float)\n"
    "df['valuation'] = df['valuation'].str.replace('$', '').str.replace('B', '').astype(float)\n"
    "df['entry_year'] = pd.to_datetime(df['entry'], errors='coerce').dt.year"
))

# 1. Total Unicorns
cells.append(nbf.new_markdown_cell("## 📊 Total Unicorns"))
cells.append(nbf.new_code_cell("print('Total unicorns:', df.shape[0])"))

# 2. Top Sectors
cells.append(nbf.new_markdown_cell("## 🔝 Top Sectors by Unicorn Count"))
cells.append(nbf.new_code_cell("df['sector'].value_counts().head(10).plot(kind='bar', title='Top 10 Sectors by Unicorn Count', color='skyblue');"))

# 3. Entry Trend
cells.append(nbf.new_markdown_cell("## 📈 Unicorn Entry Trend (Year-wise)"))
cells.append(nbf.new_code_cell("df['entry_year'].value_counts().sort_index().plot(kind='line', marker='o', title='Unicorns by Entry Year', color='green');"))

# 4. Top Cities
cells.append(nbf.new_markdown_cell("## 🌆 Top Cities by Unicorn HQ"))
cells.append(nbf.new_code_cell("df['location'].value_counts().head(10).plot(kind='barh', title='Top Cities by Unicorn HQ', color='coral');"))

# 5. Valuation Distribution
cells.append(nbf.new_markdown_cell("## 💰 Distribution of Valuations"))
cells.append(nbf.new_code_cell("df['valuation'].plot(kind='hist', bins=20, title='Distribution of Current Valuations', color='purple');"))

# 6. Sector-wise Valuation
cells.append(nbf.new_markdown_cell("## 💼 Sector-wise Total Valuation"))
cells.append(nbf.new_code_cell("df.groupby('sector')['valuation'].sum().sort_values(ascending=False).head(10).plot(kind='bar', title='Top Sectors by Total Valuation ($B)', color='darkorange');"))

# 7. Highest Valued Startups
cells.append(nbf.new_markdown_cell("## 🏆 Top 10 Highest Valued Startups"))
cells.append(nbf.new_code_cell("df[['company', 'valuation']].sort_values(by='valuation', ascending=False).head(10)"))

# 8. Growth Rate
cells.append(nbf.new_markdown_cell("## 📈 Growth Rate Since Entry"))
cells.append(nbf.new_code_cell("df['growth_rate'] = ((df['valuation'] - df['entry_valuation']) / df['entry_valuation']) * 100\n"
                               "df[['company', 'growth_rate']].sort_values(by='growth_rate', ascending=False).head(10)"))

# 9. Common Investors
cells.append(nbf.new_markdown_cell("## 🤝 Most Common Investors"))
cells.append(nbf.new_code_cell(
    "investors = df['select_investors'].dropna().str.split(', ')\n"
    "flat_list = [inv for sublist in investors for inv in sublist]\n"
    "pd.Series(Counter(flat_list)).sort_values(ascending=False).head(10).plot(kind='bar', title='Top Investors', color='teal');"
))

# 10. Entry vs Valuation
cells.append(nbf.new_markdown_cell("## 🔄 Entry vs Current Valuation"))
cells.append(nbf.new_code_cell("df[['entry_valuation', 'valuation']].plot.scatter(x='entry_valuation', y='valuation', title='Entry vs Current Valuation');"))

# Save CSV
cells.append(nbf.new_markdown_cell("## 💾 Save Cleaned Data"))
cells.append(nbf.new_code_cell("df.to_csv('../data/cleaned_data.csv', index=False)"))

notebook['cells'] = cells

# Save notebook
path = "../data/unicore-analysis.ipynb"
Path(path).write_text(nbf.writes(notebook), encoding='utf-8')
path


'../data/unicore-analysis.ipynb'