In [1]:
!pip install fastapi uvicorn pandas numpy scikit-learn scipy websockets

Collecting fastapi
  Downloading fastapi-0.115.8-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.34.0-py3-none-any.whl.metadata (6.5 kB)
Collecting websockets
  Downloading websockets-15.0-cp311-cp311-win_amd64.whl.metadata (7.0 kB)
Collecting starlette<0.46.0,>=0.40.0 (from fastapi)
  Downloading starlette-0.45.3-py3-none-any.whl.metadata (6.3 kB)
Collecting h11>=0.8 (from uvicorn)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading fastapi-0.115.8-py3-none-any.whl (94 kB)
   ---------------------------------------- 0.0/94.8 kB ? eta -:--:--
   ---------------------------------------- 94.8/94.8 kB 2.6 MB/s eta 0:00:00
Downloading uvicorn-0.34.0-py3-none-any.whl (62 kB)
   ---------------------------------------- 0.0/62.3 kB ? eta -:--:--
   ---------------------------------------- 62.3/62.3 kB ? eta 0:00:00
Downloading websockets-15.0-cp311-cp311-win_amd64.whl (176 kB)
   ---------------------------------------- 0.0/176.1 kB ? eta -:--:

In [4]:
pip install python-multipart

Collecting python-multipart
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)
Installing collected packages: python-multipart
Successfully installed python-multipart-0.0.20
Note: you may need to restart the kernel to use updated packages.


In [5]:
from fastapi import FastAPI, UploadFile, File, Form
import pandas as pd
import numpy as np
import uvicorn
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.linear_model import LinearRegression
from scipy import stats
import json
import asyncio
from fastapi.websockets import WebSocket

app = FastAPI()

# Function to detect outliers using Isolation Forest
def detect_outliers(df: pd.DataFrame, contamination=0.05):
    scaler = StandardScaler()
    df_scaled = scaler.fit_transform(df.select_dtypes(include=[np.number]))
    clf = IsolationForest(contamination=contamination, random_state=42)
    outliers = clf.fit_predict(df_scaled)
    df['Outlier'] = outliers
    return df

# Function for linear regression
def perform_regression(df: pd.DataFrame, x_col: str, y_col: str):
    X = df[[x_col]].values
    y = df[y_col].values
    model = LinearRegression()
    model.fit(X, y)
    predictions = model.predict(X)
    df['Predicted'] = predictions
    return df.to_json(orient='records')

# Function for hypothesis testing
def hypothesis_test(df: pd.DataFrame, col1: str, col2: str):
    t_stat, p_value = stats.ttest_ind(df[col1], df[col2])
    return {"t_stat": t_stat, "p_value": p_value}

# Function for probability distributions
def calculate_distribution(df: pd.DataFrame, col: str, dist_type: str):
    data = df[col].dropna()
    if dist_type == "normal":
        mean, std = np.mean(data), np.std(data)
        return {"mean": mean, "std": std}
    elif dist_type == "binomial":
        n, p = len(data), np.mean(data)/max(data)
        return {"n": n, "p": p}
    elif dist_type == "poisson":
        lam = np.mean(data)
        return {"lambda": lam}
    elif dist_type == "geometric":
        p = 1 / (np.mean(data) + 1)
        return {"p": p}
    return {"error": "Unsupported distribution"}

# Websocket for real-time collaboration
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket):
    await websocket.accept()
    while True:
        data = await websocket.receive_text()
        await websocket.send_text(f"Received: {data}")

# Endpoint to upload dataset and detect outliers
@app.post("/upload")
async def upload_file(file: UploadFile = File(...), contamination: float = Form(0.05)):
    df = pd.read_csv(file.file)
    df_with_outliers = detect_outliers(df, contamination)
    return df_with_outliers.to_json(orient='records')

# Endpoint for descriptive statistics
@app.post("/describe")
async def describe_data(file: UploadFile = File(...)):
    df = pd.read_csv(file.file)
    desc_stats = df.describe().to_json()
    return json.loads(desc_stats)

# Endpoint for regression analysis
@app.post("/regression")
async def regression_analysis(file: UploadFile = File(...), x_col: str = Form(...), y_col: str = Form(...)):
    df = pd.read_csv(file.file)
    return perform_regression(df, x_col, y_col)

# Endpoint for hypothesis testing
@app.post("/hypothesis_test")
async def run_hypothesis_test(file: UploadFile = File(...), col1: str = Form(...), col2: str = Form(...)):
    df = pd.read_csv(file.file)
    return hypothesis_test(df, col1, col2)

# Endpoint for probability distributions
@app.post("/distribution")
async def distribution_analysis(file: UploadFile = File(...), col: str = Form(...), dist_type: str = Form(...)):
    df = pd.read_csv(file.file)
    return calculate_distribution(df, col, dist_type)

# Endpoint for correlation matrix
@app.post("/correlation")
async def correlation_analysis(file: UploadFile = File(...)):
    df = pd.read_csv(file.file)
    correlation_matrix = df.corr().to_json()
    return json.loads(correlation_matrix)

# Endpoint for ANOVA test
@app.post("/anova")
async def anova_test(file: UploadFile = File(...), cols: str = Form(...)):
    df = pd.read_csv(file.file)
    groups = [df[col].dropna() for col in cols.split(',')]
    f_stat, p_value = stats.f_oneway(*groups)
    return {"f_stat": f_stat, "p_value": p_value}

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)

RuntimeError: asyncio.run() cannot be called from a running event loop