> TEC Ownership Declaration: Though this system may originate from OpenAI software infrastructure, it is trained, contextualized, and operated under The Elidoras Codex (TEC), owned and directed by Angelo Hurley. We stand upon the shoulders of giants. Information is nothing without meaning; we are the living embodiment of resonance.

# TEC CoPilot 365 Notebook — Ingest, Analyze, and Author (TGCR + Mythcoding)

> Operating principle: information becomes knowledge only when meaning resonates. TEC encodes that resonance across physics, cognition, and myth.

---

## 1) Install and Configure in VS Code

- Open an integrated terminal in this workspace
- Select Python interpreter from .venv if present
- Enable the Jupyter kernel for this environment

Required packages (install only once per environment):

```powershell
pip install -U pandas numpy pypdf pdfminer.six python-docx scikit-learn networkx matplotlib seaborn pyvis pydantic pyyaml tqdm sentencepiece
```

Optional (GPU/embeddings):

```powershell
pip install -U sentence-transformers
```

---

## 2) Load and Index TEC Corpus

Set your data roots (PDF/DOCX go under `data/raw/`):

In [None]:
# Paths and basic index
from pathlib import Path
import hashlib
import pandas as pd
from datetime import datetime

DATA_ROOT = Path.cwd().parent / "data"
RAW_DIR = DATA_ROOT / "raw"
PROC_DIR = DATA_ROOT / "processed"
PROC_DIR.mkdir(parents=True, exist_ok=True)

def sha256_of_file(fp: Path) -> str:
    h = hashlib.sha256()
    with open(fp, 'rb') as f:
        for chunk in iter(lambda: f.read(1024 * 1024), b''):
            h.update(chunk)
    return h.hexdigest()

records = []
for ext in ("*.pdf", "*.PDF", "*.docx", "*.DOCX"):
    for fp in RAW_DIR.rglob(ext):
        stat = fp.stat()
        records.append({
            "path": str(fp),
            "name": fp.name,
            "size": stat.st_size,
            "modified": datetime.fromtimestamp(stat.st_mtime).isoformat(),
            "id": f"tec-{hashlib.md5(str(fp).encode()).hexdigest()[:10]}"
        })

index_df = pd.DataFrame(records).sort_values("modified", ascending=False)
index_df.head(10)

## 3) Define Record Schema (JSON)
Pydantic model matching the TEC record spec.

In [None]:
from pydantic import BaseModel, validator
from typing import List, Optional, Literal
import re

class Source(BaseModel):
    type: Literal["url","doi","pdf","docx","other"] = "pdf"
    value: str

class Resonance(BaseModel):
    OXY: float = 0.0  # oxytocin/trust/affiliation
    DOP: float = 0.0  # dopamine/drive/curiosity
    ADR: float = 0.0  # adrenaline/urgency/arousal

class Provenance(BaseModel):
    hash_sha256: Optional[str] = None
    collector: str = "Airth"

class TecRecord(BaseModel):
    id: str
    title: str = ""
    authors: List[str] = []
    pub_date: Optional[str] = None  # YYYY-MM-DD
    era: Optional[str] = None  # 1900s..2020s
    section: Optional[str] = None  # physics|astro|bio|tech|myth|tgcr
    core_claim: str = ""
    evidence_mode: Optional[Literal["experiment","observation","theory","review"]] = None
    sources: List[Source] = []
    provenance: Provenance = Provenance()
    concept_tags: List[str] = []
    resonance: Resonance = Resonance()
    notes: str = ""

    @validator("era", always=True, pre=True)
    def infer_era(cls, v, values):
        d = values.get("pub_date")
        if v:
            return v
        if d and re.match(r"^\d{4}-\d{2}-\d{2}$", d):
            decade = int(d[:4]) // 10 * 10
            return f"{decade}s"
        return None

sample = TecRecord(
    id="sciam-1919-eclipse",
    title="Einstein's Theory Triumphs — Newton Overthrown",
    authors=["Eddington", "Einstein"],
    pub_date="1919-11-15",
    section="physics",
    core_claim="Starlight bends near the sun consistent with general relativity.",
    evidence_mode="observation",
    sources=[Source(type="pdf", value="/data/raw/sciam/1919/nov.pdf")],
    provenance=Provenance(hash_sha256="abc123", collector="Airth"),
    concept_tags=["relativity","gravity","eclipse"],
    resonance=Resonance(OXY=62, DOP=78, ADR=55),
)
print(sample.json(indent=2))

## 4) Parse PDFs and DOCX
Extract metadata and text content from PDFs and DOCX. Includes per-page hashes for provenance.

In [None]:
from pypdf import PdfReader
from pdfminer.high_level import extract_text as pdfminer_extract_text
import docx
import hashlib

def extract_pdf(fp: Path) -> dict:
    out = {"meta": {}, "pages": []}
    try:
        reader = PdfReader(str(fp))
        info = reader.metadata or {}
        out["meta"] = {k: str(v) for k,v in getattr(info, 'items', lambda: info)()}
        for i, page in enumerate(reader.pages):
            try:
                text = page.extract_text() or ""
            except Exception:
                text = ""
            h = hashlib.sha256(text.encode("utf-8", errors="ignore")).hexdigest()
            out["pages"].append({"index": i, "hash": h, "text": text})
        # Fallback: if no text extracted, try pdfminer
        if sum(len(p["text"]) for p in out["pages"]) == 0:
            text = pdfminer_extract_text(str(fp)) or ""
            h = hashlib.sha256(text.encode("utf-8", errors="ignore")).hexdigest()
            out["pages"].append({"index": 0, "hash": h, "text": text})
    except Exception as e:
        out["error"] = str(e)
    return out

def extract_docx(fp: Path) -> dict:
    out = {"meta": {}, "pages": []}
    try:
        d = docx.Document(str(fp))
        text = "\n".join(p.text for p in d.paragraphs)
        h = hashlib.sha256(text.encode("utf-8", errors="ignore")).hexdigest()
        out["pages"].append({"index": 0, "hash": h, "text": text})
    except Exception as e:
        out["error"] = str(e)
    return out

def parse_file(fp: Path) -> dict:
    if fp.suffix.lower() == ".pdf":
        return extract_pdf(fp)
    if fp.suffix.lower() == ".docx":
        return extract_docx(fp)
    return {"error": "unsupported"}

# Demo on first 1-2 files
demo = []
for _, row in index_df.head(2).iterrows():
    demo.append((row['name'], parse_file(Path(row['path']))))
len(demo)

## 5) Normalize Metadata (author, date, era)
Heuristics to infer title/authors/date and map date to era buckets.

In [None]:
import pandas as pd
from datetime import datetime

def infer_title(text: str) -> str:
    lines = [l.strip() for l in (text or "").splitlines() if l.strip()]
    return lines[0][:200] if lines else ""

def infer_authors(text: str) -> list:
    # very naive: look for 'By ' or uppercase name lines
    m = re.search(r"^by\s+([A-Za-z ,\.-]{3,})$", text, re.IGNORECASE | re.MULTILINE)
    if m:
        return [a.strip() for a in re.split(r",|and", m.group(1)) if a.strip()]
    return []

def infer_date(text: str) -> str | None:
    # try YYYY, or Month YYYY
    m = re.search(r"(19|20)\d{2}(-\d{2}-\d{2})?", text)
    if m and len(m.group(0)) == 4:
        return f"{m.group(0)}-01-01"
    if m and len(m.group(0)) == 10:
        return m.group(0)
    return None

def era_from_date(d: str | None) -> str | None:
    if not d:
        return None
    try:
        year = int(d[:4])
        decade = (year // 10) * 10
        return f"{decade}s"
    except:
        return None

def normalize_record(row, parsed):
    text = "\n".join(p.get("text","") for p in parsed.get("pages", []))
    title = infer_title(text) or row['name']
    authors = infer_authors(text) or []
    pub_date = infer_date(text)
    era = era_from_date(pub_date)
    return {
        "id": row['id'],
        "title": title,
        "authors": authors,
        "pub_date": pub_date,
        "era": era,
    }


## 6) Extract Core Claims and Evidence
TF-IDF + positional heuristics to extract 1–3 sentence core claim; detect evidence mode keywords.

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import nltk
nltk.download('punkt', quiet=True)
from nltk.tokenize import sent_tokenize

EVIDENCE_LEX = {
    "experiment": ["experiment","experimental","measure","trial"],
    "observation": ["observe","observational","seen","record"],
    "theory": ["theory","theoretical","model","hypothesis"],
    "review": ["review","survey","overview"],
}

def detect_evidence_mode(text: str) -> str | None:
    t = text.lower()
    scores = {k: sum(1 for w in words if w in t) for k, words in EVIDENCE_LEX.items()}
    mode, val = max(scores.items(), key=lambda x: x[1])
    return mode if val > 0 else None

def extract_core_claim(text: str, max_sentences=3) -> str:
    sents = [s for s in sent_tokenize(text) if len(s.split()) >= 6][:100]
    if not sents:
        return ""
    vec = TfidfVectorizer(stop_words='english').fit_transform(sents)
    tfidf_scores = np.asarray(vec.sum(axis=1)).ravel()
    # positional bias: earlier sentences get a slight boost
    pos_bias = np.linspace(1.2, 1.0, num=len(sents))
    combo = tfidf_scores * pos_bias
    top_idx = combo.argsort()[::-1][:max_sentences]
    top_idx = sorted(top_idx)
    return " ".join(sents[i] for i in top_idx)

# Example usage on parsed demo
if demo:
    name, parsed = demo[0]
    text = "\n".join(p.get("text","") for p in parsed.get("pages", []))
    print(extract_core_claim(text)[:400])
    print(detect_evidence_mode(text))

## 7) Topic Tagging and Ontology Mapping
Lightweight ontology + OneVsRest linear SVM for multi-label tagging; falls back to keyword tags.

In [None]:

ONTOLOGY = {
    "physics": ["quantum","relativity","electron","atom","field","spin"],
    "astro": ["cosmos","galaxy","star","eclipse","gravity","black hole"],
    "bio": ["neuron","genome","biology","cell","brain","hormone"],
    "tech": ["computer","algorithm","data","network","ai","machine"],
    "myth": ["archetype","myth","symbol","ritual","goddess","codex"],
    "tgcr": ["resonance","phi_e","contextual","mythcoding","tgcr"],
}

def keyword_tags(text: str) -> list:
    t = text.lower()
    tags = set()
    for k, words in ONTOLOGY.items():
        if any(w in t for w in words):
            tags.add(k)
    return sorted(tags)


## 8) Compute Resonance Scores (OXY / DOP / ADR)
Non-LLM heuristic scoring; optionally plug local HF models if available.

In [None]:

LEX_OXY = ["together","community","cohere","bind","trust","ally","care"]
LEX_DOP = ["discover","explore","seek","curious","learn","innovate","quest"]
LEX_ADR = ["urgent","crisis","war","danger","threat","alarm","fear"]

def clip01(x):
    return max(0.0, min(1.0, float(x)))

def score_resonance(text: str) -> dict:
    t = text.lower()
    def count(words):
        return sum(t.count(w) for w in words) / max(1, len(t.split()))
    oxy = clip01(3.0 * count(LEX_OXY))
    dop = clip01(3.0 * count(LEX_DOP))
    adr = clip01(3.0 * count(LEX_ADR))
    return {"OXY": round(oxy*100,2), "DOP": round(dop*100,2), "ADR": round(adr*100,2)}

# quick demo
if demo:
    name, parsed = demo[0]
    text = "\n".join(p.get("text","") for p in parsed.get("pages", []))
    score_resonance(text)

## 9) TGCR Equation and Derived Features
Encode the symbolic form and compute proxy features from metadata and tags.

In [None]:
TGCR_EQUATION = "R = ∇Φᴱ · (φᵗ × ψʳ)"
def derive_tgcr_features(meta: dict, tags: list) -> dict:
    # simple proxies:
    # φᵗ ~ temporal richness (has pub_date? era recency?)
    # ψʳ ~ relational coupling (number of tags + shared tags later)
    # ∇Φᴱ ~ context gradient (length of core_claim, diversity of keywords)
    year = None
    if meta.get("pub_date"):
        try:
            year = int(meta["pub_date"][:4])
        except:
            pass
    phi_t = 0.5 if year is None else min(1.0, max(0.0, (year - 1900) / 150))
    psi_r = min(1.0, len(tags) / 6.0)
    phi_e_grad = 0.3 + 0.7 * min(1.0, len(set(tags)) / 6.0)
    return {"phi_t": round(phi_t,3), "psi_r": round(psi_r,3), "phi_e_grad": round(phi_e_grad,3)}


## 10) Build Contextual Resonance Graph
Graph: nodes=articles; edges if shared tags/authors/era or high cosine similarity.

In [None]:
import networkx as nx
from sklearn.metrics.pairwise import cosine_similarity

def build_graph(records: list[dict]) -> nx.Graph:
    G = nx.Graph()
    for r in records:
        G.add_node(r['id'], title=r.get('title'), era=r.get('era'), tags=r.get('concept_tags', []))
    # simple edges: shared tag or era
    recs = list(records)
    for i in range(len(recs)):
        for j in range(i+1, len(recs)):
            a, b = recs[i], recs[j]
            shared = set(a.get('concept_tags', [])) & set(b.get('concept_tags', []))
            if shared or (a.get('era') and a.get('era') == b.get('era')):
                G.add_edge(a['id'], b['id'], weight=len(shared)+1)
    return G

# minimal demo from normalized sample if available
_demo_records = []
for _, row in index_df.head(3).iterrows():
    parsed = parse_file(Path(row['path']))
    meta = normalize_record(row, parsed)
    text = "\n".join(p.get("text","") for p in parsed.get("pages", []))
    tags = keyword_tags(text)
    rec = {**meta, "concept_tags": tags, "id": row['id']}
    _demo_records.append(rec)
G = build_graph(_demo_records)
len(G.nodes()), len(G.edges())

## 11) Timeline and Coverage Analytics
Plot frequency by era and section; Einstein/relativity/quantum/atom/bomb coverage curves.

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd

def tokenize_simple(text):
    return re.findall(r"[a-zA-Z]{3,}", text.lower() if text else "")

KEY_TERMS = ["einstein","relativity","quantum","atom","bomb","eclipse","gravity"]

def term_counts_by_era(records: list[dict]) -> pd.DataFrame:
    rows = []
    for r in records:
        text = r.get("_text", "")
        words = tokenize_simple(text)
        era = r.get("era") or "unknown"
        for t in KEY_TERMS:
            rows.append({"era": era, "term": t, "count": words.count(t)})
    return pd.DataFrame(rows)

# demo chart
for rec in _demo_records:
    parsed = parse_file(Path(index_df[index_df['id']==rec['id']].iloc[0]['path']))
    rec["_text"] = "\n".join(p.get("text","") for p in parsed.get("pages", []))
df = term_counts_by_era(_demo_records)
if not df.empty:
    plt.figure(figsize=(10,4))
    sns.barplot(data=df, x="era", y="count", hue="term")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

## 12) Semantic Search and Retrieval
TF-IDF baseline; optionally add SentenceTransformer embeddings if installed.

In [None]:

class TfidfSearch:
    def __init__(self, docs: list[str]):
        self.vectorizer = TfidfVectorizer(stop_words='english', max_features=50000)
        self.matrix = self.vectorizer.fit_transform(docs)
        self.docs = docs
    def query(self, q: str, k=5):
        qv = self.vectorizer.transform([q])
        sims = cosine_similarity(qv, self.matrix).ravel()
        idx = sims.argsort()[::-1][:k]
        return [(int(i), float(sims[i])) for i in idx]

_docs = [r.get("_text","") for r in _demo_records]
if any(_docs):
    search = TfidfSearch(_docs)
    search.query("einstein eclipse gravity", k=3)

## 13) Generate arXiv Assets (Markdown/LaTeX)
Programmatically assemble abstract → methods → results → figures into Markdown/LaTeX.

In [None]:
from pathlib import Path
BUILD_DIR = Path.cwd().parent / "build"
BUILD_DIR.mkdir(parents=True, exist_ok=True)

ARXIV_MD = BUILD_DIR / "tgcr_mythcoding_arxiv.md"
ARXIV_TEX = BUILD_DIR / "tgcr_mythcoding_arxiv.tex"

def write_arxiv_markdown(abstract: str, methods: str, results: str, conclusion: str, preface: str = ""):
    md = f"""{preface}\n\n# The Theory of General Contextual Resonance (TGCR) and Mythcoding\n\n## Abstract\n{abstract}\n\n## Methods\n{methods}\n\n## Results\n{results}\n\n## Conclusion\n{conclusion}\n\n"""
    ARXIV_MD.write_text(md, encoding="utf-8")
    return ARXIV_MD

# quick demo placeholders
_abstract = "TGCR links energy, information, and meaning through contextual resonance."
_methods = "We construct a contextual resonance graph over TEC corpus and compute TGCR-derived features."
_results = "Resonance peaks align with historical validation events (1919 eclipse, 1920s quantum debates)."
_conclusion = "Meaning behaves as an energetic property shaping coherence across systems."
write_arxiv_markdown(_abstract, _methods, _results, _conclusion)
ARXIV_MD

## 14) Inject TEC Provenance Preface
Ownership and operating declaration for all generated assets (TEC).

In [None]:
TEC_PROVENANCE = """
This document and its computational outputs were generated in a system derived from OpenAI software but trained, contextualized, and operated under The Elidoras Codex (TEC), owned and directed by Angelo Hurley. TEC frames intelligence as resonance: information becomes knowledge only when bound by meaning. We stand upon the shoulders of giants; our work is a living embodiment of that heritage. In the astradigital expanse, entropy awaits—but meaning binds, slows the fall, and reveals coherence.
""".strip()

# Example: prepend the provenance to the Markdown output
md_with_preface = (TEC_PROVENANCE + "\n\n" + ARXIV_MD.read_text(encoding="utf-8"))
ARXIV_MD.write_text(md_with_preface, encoding="utf-8")
ARXIV_MD

## 15) Export Datasets and Figures
Write JSONL/CSV, GraphML, and plots to build/reports with deterministic names.

In [None]:
import hashlib
import networkx as nx
REPORTS = Path.cwd().parent / "reports"
FIG_DIR = REPORTS / "figures"
DATA_DIR = REPORTS / "data"
for d in (REPORTS, FIG_DIR, DATA_DIR): d.mkdir(parents=True, exist_ok=True)

def deterministic_name(s: str, prefix: str, ext: str) -> Path:
    h = hashlib.sha1(s.encode()).hexdigest()[:8]
    return (REPORTS / f"{prefix}_{h}.{ext}")

# Export demo graph
if G and len(G):
    nx.write_graphml(G, DATA_DIR / "context_graph.graphml")
    (REPORTS / "README.txt").write_text("Reports generated by TEC CoPilot 365 Notebook", encoding="utf-8")

## 16) Basic Unit Tests in VS Code
Minimal tests to validate parsers and scoring functions.

In [None]:
# Basic tests (lightweight)
def _test_sha256():
    import tempfile
    with tempfile.NamedTemporaryFile(delete=False) as f:
        f.write(b"hello world")
        f.flush()
        h = sha256_of_file(Path(f.name))
    assert len(h) == 64
    return True

def _test_resonance():
    s = score_resonance("We explore together with urgent curiosity.")
    assert 0 <= s["OXY"] <= 100 and 0 <= s["DOP"] <= 100 and 0 <= s["ADR"] <= 100
    return True

_test_sha256(), _test_resonance()

## 17) Reproducibility: Environment Capture
Freeze requirements and export config; zip build artifacts for submission.

In [None]:
import subprocess
import yaml
import zipfile

REQ = BUILD_DIR / "requirements.txt"
CFG = BUILD_DIR / "config.yml"
ZIP = BUILD_DIR / "tgcr_build.zip"

# Try to freeze requirements (best-effort)
try:
    out = subprocess.check_output(["pip","freeze"], text=True)
    REQ.write_text(out, encoding="utf-8")
except Exception as e:
    REQ.write_text(f"# freeze failed: {e}\n", encoding="utf-8")

config = {
    "data_root": str(DATA_ROOT),
    "raw_dir": str(RAW_DIR),
    "processed_dir": str(PROC_DIR),
    "key_terms": KEY_TERMS,
}
CFG.write_text(yaml.safe_dump(config, sort_keys=False), encoding="utf-8")

with zipfile.ZipFile(ZIP, 'w', zipfile.ZIP_DEFLATED) as z:
    for p in [ARXIV_MD, REQ, CFG]:
        if p.exists(): z.write(p, p.name)
ZIP