In [None]:
import json
import io
import pandas as pd
import streamlit as st

st.set_page_config(page_title="Flow-to-JSONL Converter", layout="wide")
st.title("CSV → LLM fine-tune JSONL")

CollectionsResponse(collections=[CollectionDescription(name='llm-sparse-and-dense'), CollectionDescription(name='llm2-sparse'), CollectionDescription(name='hm2-q3q4'), CollectionDescription(name='homework2'), CollectionDescription(name='llm-rag')])

In [None]:
# ── 1. Load data ──────────────────────────────────────────────────────────────
uploaded = st.file_uploader("Upload CICFlowMeter CSV", type=["csv"])
if uploaded is None:
    st.stop()

df = pd.read_csv(uploaded)

In [None]:
# ── 2. Column selector ────────────────────────────────────────────────────────
st.subheader("Select feature columns for the prompt")
feature_cols = st.multiselect(
    "Columns",
    options=list(df.columns),
    default=[c for c in df.columns if c not in ("Label", "label")],
)

label_col = st.selectbox("Label column (completion)", options=df.columns, index=df.columns.get_loc("Label") if "Label" in df.columns else 0)

In [None]:
# ── 3. Row-level preview / edit ───────────────────────────────────────────────
st.subheader("Edit rows if needed")
edited_df = st.data_editor(
    df[feature_cols + [label_col]].copy(),
    num_rows="dynamic",
    use_container_width=True,
    key="editor",
)

In [None]:
# ── 4. Convert on demand ──────────────────────────────────────────────────────
def row_to_prompt(row, cols):
    # "Destination Port is 80, Flow Duration is 5821928, ..."
    return ", ".join(f"{col} is {row[col]}" for col in cols)

def convert_dataframe(df_in, cols, label_column):
    records = []
    for _, row in df_in.iterrows():
        records.append(
            {
                "prompt": row_to_prompt(row, cols),
                "completion": str(row[label_column]),
            }
        )
    return records

if st.button("Generate JSONL"):
    records = convert_dataframe(edited_df, feature_cols, label_col)
    buf = io.StringIO("\n".join(json.dumps(r) for r in records))
    st.download_button(
        label="Download .jsonl",
        data=buf.getvalue(),
        file_name="flows.jsonl",
        mime="application/jsonl",
    )
    st.success(f"{len(records)} rows converted")

Minimal value: -0.11726373885183883


In [None]:
pip install streamlit pandas
streamlit run app.py