diff --git a/.air.toml b/.air.toml new file mode 100644 index 000000000..4f42b59ed --- /dev/null +++ b/.air.toml @@ -0,0 +1,20 @@ +root = "." +tmp_dir = "tmp" + +[build] +cmd = "go build -o ./tmp/main.exe ./src" +bin = "tmp\\main.exe" +include_ext = ["go"] +exclude_dir = ["tmp", "vendor", "testdata"] +exclude_regex = ["_test\\.go$"] +delay = 1000 + +[log] +time = true +main_only = false + +[screen] +clear_on_rebuild = true + +[misc] +clean_on_exit = true diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 000000000..4a326c28b --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +.git +.env +.env.* +tmp/ +.obsidian/ +docs/ +*.md +!README.md diff --git a/.env.example b/.env.example index 3fd450fac..98888a43a 100644 --- a/.env.example +++ b/.env.example @@ -1,10 +1,10 @@ # App -APP_ENV=development -PORT=8080 +APP_ENV=development # development or production +PORT=8080 # API server port -# Turso -DATABASE_URL=libsql://your-db-name.turso.io -DATABASE_AUTH_TOKEN=your_auth_token +# Turso Database +DATABASE_URL=libsql://your-db-name.turso.io # Your Turso database URL +DATABASE_AUTH_TOKEN=your_auth_token # Your Turso auth token # Clerk CLERK_PUBLISHABLE_KEY=pk_test_... @@ -18,6 +18,7 @@ PADDLE_PUBLIC_KEY=your_public_key # Fly.io FLY_API_TOKEN=your_fly_token -# Optional: For development -DEBUG=true -LOG_LEVEL=debug +# Development +DEBUG=true # Enable debug logging +LOG_LEVEL=debug # debug, info, warn, or error +RUN_INTEGRATION_TESTS=true # Enable integration tests diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..38c3a2522 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,20 @@ +# Contributing to Cache Warmer + +## Development Setup + +1. Fork and clone the repository +2. Copy `.env.example` to `.env` and configure +3. Run tests with `go test ./... -v` + +## Testing + +- Unit tests: `go test ./...` +- Integration tests: Require Turso credentials in `.env` +- Test coverage: `go test ./... -cover` + +## Pull Request Process + +1. Update documentation +2. Add/update tests +3. Ensure all tests pass +4. Update the development plan if needed diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..f62d91a62 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,30 @@ +# Build stage +FROM golang:1.21-alpine AS builder + +WORKDIR /app + +# Copy go mod and sum files +COPY go.mod go.sum ./ + +# Download dependencies +RUN go mod download + +# Copy source code +COPY . . + +# Build the application +RUN CGO_ENABLED=0 GOOS=linux go build -o main ./src/main.go + +# Final stage +FROM alpine:latest + +WORKDIR /app + +# Copy binary from builder +COPY --from=builder /app/main . + +# Expose port +EXPOSE 8080 + +# Run the binary +CMD ["./main"] diff --git a/README.md b/README.md index 54bd28df3..0e2bc500e 100644 --- a/README.md +++ b/README.md @@ -10,20 +10,64 @@ Cache Warmer crawls your Webflow site after publishing, ensuring all pages are c ๐Ÿšง Currently in initial development +Core functionality implemented: + +- โœ… URL crawling with concurrent requests +- โœ… Database integration with Turso +- โœ… Basic error handling +- โœ… Test coverage for core components + +Next up: + +- ๐Ÿ”„ Rate limiting and retry logic +- ๐Ÿ”„ Fly.io deployment +- ๐Ÿ”„ Cache validation improvements + ## Tech Stack - Backend: Go - Database: Turso -- Hosting: Fly.io -- Auth: Clerk -- Payments: Paddle -- Frontend: Webflow +- Hosting: Fly.io (coming soon) +- Auth: Clerk (planned) +- Payments: Paddle (planned) +- Frontend: Webflow (planned) ## Local Development -[Coming soon - will include setup instructions] +### Prerequisites + +- Go 1.23 or later +- A Turso database account +- Git + +### Setup + +1. Clone the repository: + + ```bash + git clone https://github.com/teamharvey/cache-warmer.git + cd cache-warmer + ``` + +2. Set up environment: + + ```bash + cp .env.example .env + # Edit .env with your Turso credentials + ``` + +3. Install dependencies: + + ```bash + go mod download + ``` + +4. Run tests: + ```bash + go test ./... -v + ``` -## Project Structure +### Project Structure ## Environment Setup diff --git a/Planning/Development plan.md b/docs/planning/Development plan.md similarity index 55% rename from Planning/Development plan.md rename to docs/planning/Development plan.md index aa4c2bacf..b3b9faa89 100644 --- a/Planning/Development plan.md +++ b/docs/planning/Development plan.md @@ -1,50 +1,63 @@ -## Stage 0: Project Setup & Infrastructure (6-10 hrs) - -### Development Environment Setup (2-3 hrs) - -- [ ] Initialise GitHub repository -- [ ] Set up branch protection -- [ ] Create dev/prod branches -- [ ] Set up local development environment -- [ ] Add initial documentation -### Go Project Structure (2-3 hrs) - -- [ ] Initialize Go project -- [ ] Set up dependency management -- [ ] Create project structure -- [ ] Add basic configs -- [ ] Set up testing framework -### Environment Configuration (2-4 hrs) - -- [ ] Set up dev/prod environments -- [ ] Configure environment variables -- [ ] Set up secrets management -- [ ] Configure Fly.io for both environments -- [ ] Set up initial deployment workflow -## Stage 1: Core Setup & Basic Crawling (15-25 hrs) - -### Set up Go project with Fly.io deployment (3-5 hrs) - -- [ ] Initialize Go project structure and dependencies -- [ ] Set up basic API endpoints -- [ ] Configure Fly.io deployment settings -- [ ] Set up environment variables and configs -- [ ] Implement basic health checks and monitoring -### Implement basic crawler (using Go's Colly) (8-12 hrs) - -- [ ] Set up Colly crawler configuration -- [ ] Implement concurrent crawling logic +## Stage 0: Project Setup & Infrastructure (6-10 hrs) ๐ŸŸก + +### Development Environment Setup (2-3 hrs) โœ… + +- [x] Initialise GitHub repository +- [x] Set up branch protection +- [x] Create dev/prod branches +- [x] Set up local development environment +- [x] Add initial documentation + +### Go Project Structure (2-3 hrs) โœ… + +- [x] Initialize Go project +- [x] Set up dependency management +- [x] Create project structure +- [x] Add basic configs +- [x] Set up testing framework + +### Production Infrastructure Setup (2-4 hrs) โšกPriority + +- [x] Set up dev/prod environments +- [x] Configure environment variables +- [x] Set up secrets management +- [ ] Configure Fly.io + - [ ] Set up Fly.io account and project + - [ ] Configure deployment settings + - [ ] Set up environment variables in Fly.io + - [ ] Create deployment workflow + - [ ] Add health check endpoint monitoring +- [ ] Test production deployment + +## Stage 1: Core Setup & Basic Crawling (15-25 hrs) ๐ŸŸก + +### Core API Implementation (3-5 hrs) + +- [x] Initialize Go project structure and dependencies +- [x] Set up basic API endpoints +- [x] Set up environment variables and configs +- [x] Implement basic health checks and monitoring + +### Enhance Crawler Results (8-12 hrs) โšกPriority + +- [x] Set up Colly crawler configuration +- [x] Implement concurrent crawling logic +- [x] Add basic error handling - [ ] Add rate limiting and retry logic - [ ] Handle different response types/errors - [ ] Implement cache validation checks - [ ] Add performance metrics collection -### Set up Turso for storing results (4-8 hrs) -- [ ] Design database schema -- [ ] Set up Turso connection and config -- [ ] Implement data models and queries -- [ ] Add error handling and retries +### Set up Turso for storing results (4-8 hrs) ๐ŸŸก + +- [x] Design database schema +- [x] Set up Turso connection and config +- [x] Implement data models and queries +- [x] Add basic error handling +- [x] Set up integration tests +- [ ] Add retry logic - [ ] Set up basic data cleanup routines + ## Stage 2: Auth & User Management (10-16 hrs) ### Implement Clerk authentication (4-6 hrs) @@ -61,6 +74,7 @@ - [ ] Implement user profile storage - [ ] Add user preferences handling - [ ] Set up user data sync with Clerk + ### Set up basic usage tracking (4-6 hrs) - [ ] Implement usage counters @@ -68,6 +82,7 @@ - [ ] Set up usage reset schedule - [ ] Implement usage notifications - [ ] Add basic reporting functions + ## Stage 3: Billing & Subscriptions (8-12 hrs) ### Implement Paddle integration (4-6 hrs) @@ -76,19 +91,22 @@ - [ ] Implement subscription webhooks - [ ] Add payment flow integration - [ ] Set up subscription plans -- [ ] Implement checkout process +- [ ] Implement checkout process + ### Connect subscription status to user accounts (2-3 hrs) - [ ] Link subscriptions to users - [ ] Handle subscription updates - [ ] Implement plan changes - [ ] Add subscription status checks + ### Add usage limits/tracking (2-3 hrs) - [ ] Implement plan-based limits - [ ] Add upgrade prompts - [ ] Set up usage warnings -- [ ] Implement grace period +- [ ] Implement grace period + ## Stage 4: Webflow Integration & Launch (8-16 hrs) ### Build Webflow frontend interface (4-8 hrs) @@ -98,6 +116,7 @@ - [ ] Implement results display - [ ] Add usage statistics display - [ ] Create settings interface + ### Connect to backend APIs (3-5 hrs) - [ ] Implement API calls @@ -105,18 +124,20 @@ - [ ] Set up response handling - [ ] Implement loading states - [ ] Add retry logic + ### Set up monitoring (GA) (1-3 hrs) - [ ] Configure GA tracking - [ ] Add custom events - [ ] Set up conversion tracking - [ ] Implement error tracking -- [ ] Create basic dashboards +- [ ] Create basic dashboards --- + ## Key Risk Areas: - [ ] Crawler edge cases and error handling - [ ] Auth integration complexity - [ ] Paddle webhook handling -- [ ] Webflow API limitations \ No newline at end of file +- [ ] Webflow API limitations diff --git a/Planning/Infrastructure.md b/docs/planning/Infrastructure.md similarity index 100% rename from Planning/Infrastructure.md rename to docs/planning/Infrastructure.md diff --git a/fly.toml b/fly.toml new file mode 100644 index 000000000..cc39dee06 --- /dev/null +++ b/fly.toml @@ -0,0 +1,25 @@ +app = "blue-banded-bee" +primary_region = "syd" # Sydney region, since we're in AU + +[build] + dockerfile = "Dockerfile" + +[env] + PORT = "8080" + APP_ENV = "production" + +[http_service] + internal_port = 8080 + force_https = true + auto_stop_machines = true + auto_start_machines = true + min_machines_running = 1 + processes = ["app"] + +[[http_service.checks]] + grace_period = "10s" + interval = "30s" + method = "GET" + path = "/health" + protocol = "http" + timeout = "5s" diff --git a/go.mod b/go.mod index 2a21b7cc8..f3271c06c 100644 --- a/go.mod +++ b/go.mod @@ -4,31 +4,43 @@ go 1.23.0 toolchain go1.23.8 +require ( + github.com/gocolly/colly/v2 v2.2.0 + github.com/joho/godotenv v1.5.1 + github.com/rs/zerolog v1.34.0 + github.com/tursodatabase/libsql-client-go v0.0.0-20240902231107-85af5b9d094d + modernc.org/sqlite v1.37.0 +) + require ( github.com/PuerkitoBio/goquery v1.10.2 // indirect github.com/andybalholm/cascadia v1.3.3 // indirect github.com/antchfx/htmlquery v1.3.4 // indirect github.com/antchfx/xmlquery v1.4.4 // indirect github.com/antchfx/xpath v1.3.3 // indirect + github.com/antlr4-go/antlr/v4 v4.13.0 // indirect github.com/bits-and-blooms/bitset v1.22.0 // indirect - github.com/go-chi/chi/v5 v5.2.1 // indirect - github.com/go-chi/cors v1.2.1 // indirect + github.com/coder/websocket v1.8.12 // indirect + github.com/dustin/go-humanize v1.0.1 // indirect github.com/gobwas/glob v0.2.3 // indirect - github.com/gocolly/colly/v2 v2.2.0 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect github.com/golang/protobuf v1.5.4 // indirect - github.com/joho/godotenv v1.5.1 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/kennygrant/sanitize v1.2.4 // indirect github.com/mattn/go-colorable v0.1.13 // indirect - github.com/mattn/go-isatty v0.0.19 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/ncruces/go-strftime v0.1.9 // indirect github.com/nlnwa/whatwg-url v0.6.1 // indirect - github.com/rs/zerolog v1.34.0 // indirect + github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect github.com/temoto/robotstxt v1.1.2 // indirect - github.com/tursodatabase/libsql-client-go v0.0.0-20240902231107-85af5b9d094d // indirect + golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 // indirect golang.org/x/net v0.37.0 // indirect golang.org/x/sys v0.31.0 // indirect golang.org/x/text v0.23.0 // indirect google.golang.org/appengine v1.6.8 // indirect google.golang.org/protobuf v1.36.6 // indirect + modernc.org/libc v1.62.1 // indirect + modernc.org/mathutil v1.7.1 // indirect + modernc.org/memory v1.9.1 // indirect ) diff --git a/go.sum b/go.sum index da890e1dd..2c89a35ff 100644 --- a/go.sum +++ b/go.sum @@ -8,19 +8,20 @@ github.com/antchfx/xmlquery v1.4.4 h1:mxMEkdYP3pjKSftxss4nUHfjBhnMk4imGoR96FRY2d github.com/antchfx/xmlquery v1.4.4/go.mod h1:AEPEEPYE9GnA2mj5Ur2L5Q5/2PycJ0N9Fusrx9b12fc= github.com/antchfx/xpath v1.3.3 h1:tmuPQa1Uye0Ym1Zn65vxPgfltWb/Lxu2jeqIGteJSRs= github.com/antchfx/xpath v1.3.3/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= +github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI= +github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g= github.com/bits-and-blooms/bitset v1.20.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/bits-and-blooms/bitset v1.22.0 h1:Tquv9S8+SGaS3EhyA+up3FXzmkhxPGjQQCkcs2uw7w4= github.com/bits-and-blooms/bitset v1.22.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= +github.com/coder/websocket v1.8.12 h1:5bUXkEPPIbewrnkU8LTCLVaxi4N4J8ahufH2vlo4NAo= +github.com/coder/websocket v1.8.12/go.mod h1:LNVeNrXQZfe5qhS9ALED3uA+l5pPqvwXg3CKoDBB2gs= github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/go-chi/chi/v5 v5.2.1 h1:KOIHODQj58PmL80G2Eak4WdvUzjSJSm0vG72crDCqb8= -github.com/go-chi/chi/v5 v5.2.1/go.mod h1:L2yAIGWB3H+phAw1NxKwWM+7eUH/lU8pOMm5hHcoops= -github.com/go-chi/cors v1.2.1 h1:xEC8UT3Rlp2QuWNEr4Fs/c2EAGVKBwy/1vHx3bppil4= -github.com/go-chi/cors v1.2.1/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn836hqM7JxpglAy2Vzc58= +github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= +github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= -github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= -github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= github.com/gocolly/colly/v2 v2.2.0 h1:FQGxcqvTdFAvOpMRhk52o20Qsf6KtRU5HSf0bITS38I= github.com/gocolly/colly/v2 v2.2.0/go.mod h1:YOQwv1ofoQOzJiELnkThDd6ObOfl6odUk2i6Czbx3Ws= github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= @@ -32,7 +33,12 @@ github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e h1:ijClszYn+mADRFY17kjQEVQ1XRhq2/JR1M3sGqeJoxs= +github.com/google/pprof v0.0.0-20250317173921-a4b03ec1a45e/go.mod h1:boTsfXsheKC2y+lKOCMpSfarhxDeIzfZG1jqGcPl3cA= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= @@ -40,18 +46,25 @@ github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2 github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= -github.com/mattn/go-isatty v0.0.19 h1:JITubQf0MOLdlGRuRq+jtsDlekdYPia9ZFsB8h/APPA= github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= +github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/nlnwa/whatwg-url v0.6.1 h1:Zlefa3aglQFHF/jku45VxbEJwPicDnOz64Ra3F7npqQ= github.com/nlnwa/whatwg-url v0.6.1/go.mod h1:x0FPXJzzOEieQtsBT/AKvbiBbQ46YlL6Xa7m02M1ECk= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE= +github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo= github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY= github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ= github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg= github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= @@ -65,11 +78,15 @@ golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDf golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc= +golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 h1:nDVHiLt8aIbd/VzvPWN6kSOPE7+F/fNFDSXLVYkE/Iw= +golang.org/x/exp v0.0.0-20250305212735-054e65f0b394/go.mod h1:sIifuuw/Yco/y6yb6+bDNfyeQ/MdPUy/hKEMYQV17cM= golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU= +golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= @@ -89,6 +106,8 @@ golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.12.0 h1:MHc5BpPuC30uJk597Ri8TV3CNZcTLu6B6z4lJy+g6Jw= +golang.org/x/sync v0.12.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -98,12 +117,12 @@ golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= @@ -133,6 +152,8 @@ golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/tools v0.31.0 h1:0EedkvKDbh+qistFTd0Bcwe/YLh4vHwWEkiI0toFIBU= +golang.org/x/tools v0.31.0/go.mod h1:naFTU+Cev749tSJRXJlna0T3WxKvb1kWEx15xA4SdmQ= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= @@ -141,3 +162,27 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY= google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY= +modernc.org/cc/v4 v4.25.2 h1:T2oH7sZdGvTaie0BRNFbIYsabzCxUQg8nLqCdQ2i0ic= +modernc.org/cc/v4 v4.25.2/go.mod h1:uVtb5OGqUKpoLWhqwNQo/8LwvoiEBLvZXIQ/SmO6mL0= +modernc.org/ccgo/v4 v4.25.1 h1:TFSzPrAGmDsdnhT9X2UrcPMI3N/mJ9/X9ykKXwLhDsU= +modernc.org/ccgo/v4 v4.25.1/go.mod h1:njjuAYiPflywOOrm3B7kCB444ONP5pAVr8PIEoE0uDw= +modernc.org/fileutil v1.3.0 h1:gQ5SIzK3H9kdfai/5x41oQiKValumqNTDXMvKo62HvE= +modernc.org/fileutil v1.3.0/go.mod h1:XatxS8fZi3pS8/hKG2GH/ArUogfxjpEKs3Ku3aK4JyQ= +modernc.org/gc/v2 v2.6.5 h1:nyqdV8q46KvTpZlsw66kWqwXRHdjIlJOhG6kxiV/9xI= +modernc.org/gc/v2 v2.6.5/go.mod h1:YgIahr1ypgfe7chRuJi2gD7DBQiKSLMPgBQe9oIiito= +modernc.org/libc v1.62.1 h1:s0+fv5E3FymN8eJVmnk0llBe6rOxCu/DEU+XygRbS8s= +modernc.org/libc v1.62.1/go.mod h1:iXhATfJQLjG3NWy56a6WVU73lWOcdYVxsvwCgoPljuo= +modernc.org/mathutil v1.7.1 h1:GCZVGXdaN8gTqB1Mf/usp1Y/hSqgI2vAGGP4jZMCxOU= +modernc.org/mathutil v1.7.1/go.mod h1:4p5IwJITfppl0G4sUEDtCr4DthTaT47/N3aT6MhfgJg= +modernc.org/memory v1.9.1 h1:V/Z1solwAVmMW1yttq3nDdZPJqV1rM05Ccq6KMSZ34g= +modernc.org/memory v1.9.1/go.mod h1:/JP4VbVC+K5sU2wZi9bHoq2MAkCnrt2r98UGeSK7Mjw= +modernc.org/opt v0.1.4 h1:2kNGMRiUjrp4LcaPuLY2PzUfqM/w9N23quVwhKt5Qm8= +modernc.org/opt v0.1.4/go.mod h1:03fq9lsNfvkYSfxrfUhZCWPk1lm4cq4N+Bh//bEtgns= +modernc.org/sortutil v1.2.1 h1:+xyoGf15mM3NMlPDnFqrteY07klSFxLElE2PVuWIJ7w= +modernc.org/sortutil v1.2.1/go.mod h1:7ZI3a3REbai7gzCLcotuw9AC4VZVpYMjDzETGsSMqJE= +modernc.org/sqlite v1.37.0 h1:s1TMe7T3Q3ovQiK2Ouz4Jwh7dw4ZDqbebSDTlSJdfjI= +modernc.org/sqlite v1.37.0/go.mod h1:5YiWv+YviqGMuGw4V+PNplcyaJ5v+vQd7TQOgkACoJM= +modernc.org/strutil v1.2.1 h1:UneZBkQA+DX2Rp35KcM69cSsNES9ly8mQWD71HKlOA0= +modernc.org/strutil v1.2.1/go.mod h1:EHkiggD70koQxjVdSBM3JKM7k6L0FbGE5eymy9i3B9A= +modernc.org/token v1.1.0 h1:Xl7Ap9dKaEs5kLoOQeQmPWevfnk/DM5qcLcYlA8ys6Y= +modernc.org/token v1.1.0/go.mod h1:UGzOrNV1mAFSEB63lOFHIpNRUVMvYTc6yu1SMY/XTDM= diff --git a/src/crawler/config.go b/src/crawler/config.go index 794158b98..aea6a4864 100644 --- a/src/crawler/config.go +++ b/src/crawler/config.go @@ -20,4 +20,4 @@ func DefaultConfig() *Config { RetryAttempts: 3, RetryDelay: 2 * time.Second, } -} \ No newline at end of file +} diff --git a/src/crawler/crawler.go b/src/crawler/crawler.go index e032ee9c0..c1bedc22c 100644 --- a/src/crawler/crawler.go +++ b/src/crawler/crawler.go @@ -2,6 +2,9 @@ package crawler import ( "context" + "database/sql" + "fmt" + "net/url" "time" "github.com/gocolly/colly/v2" @@ -36,20 +39,73 @@ func New(config *Config) *Crawler { } } -func (c *Crawler) WarmURL(ctx context.Context, url string) (*CrawlResult, error) { +func (c *Crawler) WarmURL(ctx context.Context, targetURL string) (*CrawlResult, error) { start := time.Now() result := &CrawlResult{ - URL: url, + URL: targetURL, Timestamp: time.Now().Unix(), } - err := c.colly.Visit(url) + // Parse and validate URL + parsedURL, err := url.Parse(targetURL) if err != nil { - log.Error().Err(err).Str("url", url).Msg("Failed to crawl URL") result.Error = err.Error() return result, err } + // Additional validation + if parsedURL.Scheme == "" || parsedURL.Host == "" { + err := fmt.Errorf("invalid URL format: %s", targetURL) + result.Error = err.Error() + return result, err + } + + c.colly.OnResponse(func(r *colly.Response) { + result.StatusCode = r.StatusCode + result.CacheStatus = r.Headers.Get("CF-Cache-Status") + + // Treat non-2xx status codes as errors + if r.StatusCode < 200 || r.StatusCode >= 300 { + result.Error = fmt.Sprintf("HTTP %d: Non-successful status code", r.StatusCode) + } + }) + + c.colly.OnError(func(r *colly.Response, err error) { + if r != nil { + result.StatusCode = r.StatusCode + } + result.Error = err.Error() + }) + + err = c.colly.Visit(targetURL) + if err != nil { + log.Error().Err(err).Str("url", targetURL).Msg("Failed to crawl URL") + result.Error = err.Error() + return result, err + } + + c.colly.Wait() result.ResponseTime = time.Since(start).Milliseconds() + + // Return error if we got a non-2xx status code or any other error + if result.Error != "" { + return result, fmt.Errorf(result.Error) + } + return result, nil -} \ No newline at end of file +} + +func setupSchema(db *sql.DB) error { + _, err := db.Exec(` + CREATE TABLE IF NOT EXISTS crawl_results ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + url TEXT NOT NULL, + response_time INTEGER NOT NULL, + status_code INTEGER NOT NULL, + error TEXT NULL, -- Changed to allow NULL + cache_status TEXT NULL, -- Changed to allow NULL + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ) + `) + return err +} diff --git a/src/crawler/crawler_test.go b/src/crawler/crawler_test.go new file mode 100644 index 000000000..52cd2d846 --- /dev/null +++ b/src/crawler/crawler_test.go @@ -0,0 +1,77 @@ +package crawler + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" +) + +func TestWarmURL(t *testing.T) { + // Create a test server + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("CF-Cache-Status", "HIT") + w.WriteHeader(http.StatusOK) + w.Write([]byte("Hello, World!")) + })) + defer ts.Close() + + crawler := New(nil) + result, err := crawler.WarmURL(context.Background(), ts.URL) + if err != nil { + t.Errorf("Expected no error, got %v", err) + } + + if result.StatusCode != http.StatusOK { + t.Errorf("Expected status code %d, got %d", http.StatusOK, result.StatusCode) + } + + if result.CacheStatus != "HIT" { + t.Errorf("Expected cache status HIT, got %s", result.CacheStatus) + } +} + +func TestWarmURLError(t *testing.T) { + crawler := New(nil) + // Use a malformed URL instead + result, err := crawler.WarmURL(context.Background(), "not-a-valid-url") + + if err == nil { + t.Error("Expected error for invalid URL, got nil") + } + + if result.Error == "" { + t.Error("Expected error message in result, got empty string") + } +} + +func TestWarmURLWithDifferentStatuses(t *testing.T) { + tests := []struct { + name string + statusCode int + wantError bool + }{ + {"success", http.StatusOK, false}, + {"not found", http.StatusNotFound, true}, + {"server error", http.StatusInternalServerError, true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(tt.statusCode) + })) + defer ts.Close() + + crawler := New(nil) + result, err := crawler.WarmURL(context.Background(), ts.URL) + + if (err != nil) != tt.wantError { + t.Errorf("WarmURL() error = %v, wantError %v", err, tt.wantError) + } + if result.StatusCode != tt.statusCode { + t.Errorf("WarmURL() status = %v, want %v", result.StatusCode, tt.statusCode) + } + }) + } +} diff --git a/src/crawler/types.go b/src/crawler/types.go index 0d076cbea..54339c971 100644 --- a/src/crawler/types.go +++ b/src/crawler/types.go @@ -15,4 +15,4 @@ type CrawlOptions struct { RateLimit int Timeout int FollowLinks bool -} \ No newline at end of file +} diff --git a/src/db/db.go b/src/db/db.go index 152487f68..72bfa4e4d 100644 --- a/src/db/db.go +++ b/src/db/db.go @@ -1,9 +1,12 @@ package db import ( + "context" "database/sql" + "time" - "github.com/tursodatabase/libsql-client-go" + "github.com/rs/zerolog/log" + _ "github.com/tursodatabase/libsql-client-go/libsql" ) type DB struct { @@ -11,12 +14,22 @@ type DB struct { } type Config struct { - URL string + URL string AuthToken string } +type CrawlResult struct { + ID int64 `json:"id"` + URL string `json:"url"` + ResponseTime int64 `json:"response_time_ms"` + StatusCode int `json:"status_code"` + Error string `json:"error,omitempty"` + CacheStatus string `json:"cache_status,omitempty"` + CreatedAt time.Time `json:"created_at"` +} + func New(config *Config) (*DB, error) { - client, err := libsql.Open(config.URL, libsql.WithAuthToken(config.AuthToken)) + client, err := sql.Open("libsql", config.URL+"?authToken="+config.AuthToken) if err != nil { return nil, err } @@ -45,4 +58,68 @@ func setupSchema(db *sql.DB) error { ) `) return err -} \ No newline at end of file +} + +// StoreCrawlResult stores a new crawl result in the database +func (db *DB) StoreCrawlResult(ctx context.Context, result *CrawlResult) error { + _, err := db.client.ExecContext(ctx, ` + INSERT INTO crawl_results (url, response_time, status_code, error, cache_status) + VALUES (?, ?, ?, ?, ?) + `, result.URL, result.ResponseTime, result.StatusCode, result.Error, result.CacheStatus) + + if err != nil { + log.Error().Err(err).Msg("Failed to store crawl result") + return err + } + + return nil +} + +// GetRecentResults retrieves the most recent crawl results +func (db *DB) GetRecentResults(ctx context.Context, limit int) ([]CrawlResult, error) { + rows, err := db.client.QueryContext(ctx, ` + SELECT id, url, response_time, status_code, error, cache_status, created_at + FROM crawl_results + ORDER BY created_at DESC + LIMIT ? + `, limit) + + if err != nil { + return nil, err + } + defer rows.Close() + + var results []CrawlResult + for rows.Next() { + var r CrawlResult + err := rows.Scan(&r.ID, &r.URL, &r.ResponseTime, &r.StatusCode, &r.Error, &r.CacheStatus, &r.CreatedAt) + if err != nil { + return nil, err + } + results = append(results, r) + } + + return results, nil +} + +// Close closes the database connection +func (db *DB) Close() error { + return db.client.Close() +} + +// TestConnection tests the database connection by inserting and querying a test record +func (db *DB) TestConnection() error { + _, err := db.client.Exec(` + INSERT INTO test_connection (created_at) + VALUES (CURRENT_TIMESTAMP) + `) + return err +} + +func (db *DB) ResetSchema() error { + _, err := db.client.Exec(`DROP TABLE IF EXISTS crawl_results`) + if err != nil { + return err + } + return setupSchema(db.client) +} diff --git a/src/db/db_test.go b/src/db/db_test.go new file mode 100644 index 000000000..6006b0b81 --- /dev/null +++ b/src/db/db_test.go @@ -0,0 +1,174 @@ +package db + +import ( + "context" + "fmt" + "os" + "path/filepath" + "testing" + + "github.com/joho/godotenv" + _ "modernc.org/sqlite" +) + +func init() { + // Find project root (where go.mod is) + projectRoot, _ := os.Getwd() + for { + if _, err := os.Stat(filepath.Join(projectRoot, "go.mod")); err == nil { + break + } + parent := filepath.Dir(projectRoot) + if parent == projectRoot { + break + } + projectRoot = parent + } + + // Load .env from project root + if err := godotenv.Load(filepath.Join(projectRoot, ".env")); err != nil { + fmt.Printf("Error loading .env file: %v\n", err) + } +} + +func TestStoreCrawlResult(t *testing.T) { + // Setup test database connection with different memory mode + dbConfig := &Config{ + URL: "file::memory:", + AuthToken: "", + } + + database, err := New(dbConfig) + if err != nil { + t.Fatalf("Failed to create test database: %v", err) + } + defer database.Close() + + // Test case + testResult := &CrawlResult{ + URL: "https://test.com", + ResponseTime: 100, + StatusCode: 200, + Error: "", + CacheStatus: "HIT", + } + + // Store the result + err = database.StoreCrawlResult(context.Background(), testResult) + if err != nil { + t.Errorf("Failed to store crawl result: %v", err) + } + + // Retrieve and verify + results, err := database.GetRecentResults(context.Background(), 1) + if err != nil { + t.Errorf("Failed to get recent results: %v", err) + } + + if len(results) != 1 { + t.Errorf("Expected 1 result, got %d", len(results)) + } + + if results[0].URL != testResult.URL { + t.Errorf("Expected URL %s, got %s", testResult.URL, results[0].URL) + } +} + +func TestGetRecentResults(t *testing.T) { + dbConfig := &Config{ + URL: "file::memory:", + AuthToken: "", + } + + database, err := New(dbConfig) + if err != nil { + t.Fatalf("Failed to create test database: %v", err) + } + defer database.Close() + + // Insert multiple test results + testResults := []CrawlResult{ + {URL: "https://test1.com", ResponseTime: 100, StatusCode: 200}, + {URL: "https://test2.com", ResponseTime: 200, StatusCode: 200}, + {URL: "https://test3.com", ResponseTime: 300, StatusCode: 404}, + } + + for _, result := range testResults { + err = database.StoreCrawlResult(context.Background(), &result) + if err != nil { + t.Fatalf("Failed to store test result: %v", err) + } + } + + // Test retrieving with limit + limit := 2 + results, err := database.GetRecentResults(context.Background(), limit) + if err != nil { + t.Errorf("Failed to get recent results: %v", err) + } + + if len(results) != limit { + t.Errorf("Expected %d results, got %d", limit, len(results)) + } +} + +func TestTursoConnection(t *testing.T) { + // Set test environment variables + t.Setenv("RUN_INTEGRATION_TESTS", "true") + t.Setenv("DATABASE_URL", os.Getenv("DATABASE_URL")) // Use existing if available + t.Setenv("DATABASE_AUTH_TOKEN", os.Getenv("DATABASE_AUTH_TOKEN")) // Use existing if available + + // Debug: Print environment variables + t.Logf("RUN_INTEGRATION_TESTS=%s", os.Getenv("RUN_INTEGRATION_TESTS")) + t.Logf("DATABASE_URL=%s", os.Getenv("DATABASE_URL")) + t.Logf("DATABASE_AUTH_TOKEN=%s", os.Getenv("DATABASE_AUTH_TOKEN")) + + // Skip if not in integration test mode + if os.Getenv("RUN_INTEGRATION_TESTS") != "true" { + t.Skip("Skipping integration test") + } + + // Use real Turso credentials + dbConfig := &Config{ + URL: os.Getenv("DATABASE_URL"), + AuthToken: os.Getenv("DATABASE_AUTH_TOKEN"), + } + + database, err := New(dbConfig) + if err != nil { + t.Fatalf("Failed to connect to Turso: %v", err) + } + defer database.Close() + + // Test actual database operations + err = database.TestConnection() + if err != nil { + t.Errorf("Turso connection test failed: %v", err) + } +} + +func TestNullHandling(t *testing.T) { + dbConfig := &Config{ + URL: "file::memory:", + AuthToken: "", + } + + database, err := New(dbConfig) + if err != nil { + t.Fatalf("Failed to create test database: %v", err) + } + defer database.Close() + + // Test with null fields + testResult := &CrawlResult{ + URL: "https://test.com", + ResponseTime: 100, + StatusCode: 200, + // Error and CacheStatus intentionally left empty + } + + err = database.StoreCrawlResult(context.Background(), testResult) + if err != nil { + t.Errorf("Failed to store result with null fields: %v", err) + } +} diff --git a/src/main.go b/src/main.go index 9aef40823..239f3a957 100644 --- a/src/main.go +++ b/src/main.go @@ -11,6 +11,7 @@ import ( "github.com/rs/zerolog" "github.com/rs/zerolog/log" "github.com/teamharvey/cache-warmer/src/crawler" + "github.com/teamharvey/cache-warmer/src/db" ) type Config struct { @@ -24,6 +25,7 @@ type Config struct { func setupLogging(config *Config) { // Set up pretty console logging for development if config.Env == "development" { + log.Logger = log.Output(zerolog.ConsoleWriter{Out: os.Stdout, TimeFormat: time.RFC3339}) } @@ -75,22 +77,111 @@ func main() { http.HandleFunc("/test-crawl", func(w http.ResponseWriter, r *http.Request) { url := r.URL.Query().Get("url") if url == "" { - url = "https://www.teamharvey.co" // default test URL + url = "https://www.teamharvey.co" // default test URL + } + + // Initialize database + dbConfig := &db.Config{ + URL: config.DatabaseURL, + AuthToken: config.AuthToken, } - crawler := crawler.New(nil) // use default config + database, err := db.New(dbConfig) + if err != nil { + log.Error().Err(err).Msg("Failed to connect to database") + http.Error(w, "Database connection failed", http.StatusInternalServerError) + return + } + defer database.Close() + + // Perform crawl + crawler := crawler.New(nil) // use default config result, err := crawler.WarmURL(r.Context(), url) - + if err != nil { log.Error().Err(err).Msg("Crawl failed") http.Error(w, err.Error(), http.StatusInternalServerError) return } + // Store result in database + crawlResult := &db.CrawlResult{ + URL: result.URL, + ResponseTime: result.ResponseTime, + StatusCode: result.StatusCode, + Error: result.Error, + CacheStatus: result.CacheStatus, + } + + if err := database.StoreCrawlResult(r.Context(), crawlResult); err != nil { + log.Error().Err(err).Msg("Failed to store crawl result") + http.Error(w, "Failed to store result", http.StatusInternalServerError) + return + } + w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(result) }) + // Add endpoint to get recent crawls + http.HandleFunc("/recent-crawls", func(w http.ResponseWriter, r *http.Request) { + dbConfig := &db.Config{ + URL: config.DatabaseURL, + AuthToken: config.AuthToken, + } + + database, err := db.New(dbConfig) + if err != nil { + log.Error().Err(err).Msg("Failed to connect to database") + http.Error(w, "Database connection failed", http.StatusInternalServerError) + return + } + defer database.Close() + + results, err := database.GetRecentResults(r.Context(), 10) // Get last 10 results + if err != nil { + log.Error().Err(err).Msg("Failed to get recent results") + http.Error(w, "Failed to get results", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(results) + }) + + // Reset database endpoint (development only) + http.HandleFunc("/reset-db", func(w http.ResponseWriter, r *http.Request) { + // Only allow in development mode + if config.Env != "development" { + http.Error(w, "Not allowed in production", http.StatusForbidden) + return + } + + dbConfig := &db.Config{ + URL: config.DatabaseURL, + AuthToken: config.AuthToken, + } + + database, err := db.New(dbConfig) + if err != nil { + log.Error().Err(err).Msg("Failed to connect to database") + http.Error(w, "Database connection failed", http.StatusInternalServerError) + return + } + defer database.Close() + + if err := database.ResetSchema(); err != nil { + log.Error().Err(err).Msg("Failed to reset database schema") + http.Error(w, "Failed to reset database", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]string{ + "status": "Database schema reset successfully", + }) + }) + // Start server log.Info(). Str("port", config.Port). diff --git a/src/main_test.go b/src/main_test.go new file mode 100644 index 000000000..7630ed6bc --- /dev/null +++ b/src/main_test.go @@ -0,0 +1,43 @@ +package main + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestHealthEndpoint(t *testing.T) { + // Create a request to pass to our handler + req, err := http.NewRequest("GET", "/health", nil) + if err != nil { + t.Fatal(err) + } + + // Create a ResponseRecorder to record the response + rr := httptest.NewRecorder() + handler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("OK")) + }) + + // Our handlers satisfy http.Handler, so we can call their ServeHTTP method + // directly and pass in our Request and ResponseRecorder + handler.ServeHTTP(rr, req) + + // Check the status code + if status := rr.Code; status != http.StatusOK { + t.Errorf("handler returned wrong status code: got %v want %v", + status, http.StatusOK) + } + + // Check the response body + expected := "OK" + if rr.Body.String() != expected { + t.Errorf("handler returned unexpected body: got %v want %v", + rr.Body.String(), expected) + } +} + +func TestTestCrawlEndpoint(t *testing.T) { + t.Skip("TODO: Implement after refactoring handlers") + // This test will be implemented after refactoring handlers to be testable +} diff --git a/tmp/main.exe b/tmp/main.exe new file mode 100644 index 000000000..fca0afda2 Binary files /dev/null and b/tmp/main.exe differ