Skip to content

Commit

Permalink
Log to disk (#81)
Browse files Browse the repository at this point in the history
  • Loading branch information
mariusandra committed May 6, 2024
1 parent 7ebda71 commit 0414400
Show file tree
Hide file tree
Showing 22 changed files with 1,582 additions and 54 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pull-request-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ jobs:
- uses: jiro4989/setup-nim-action@v1
with:
nim-version: '2.0.2'
nim-version: '2.0.4'
repo-token: ${{ secrets.GITHUB_TOKEN }}

- name: FrameOS nim tests
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ RUN apt-get update && \
apt-get install -y curl xz-utils gcc openssl ca-certificates git # &&

RUN mkdir -p /opt/nim && \
curl -L https://nim-lang.org/download/nim-2.0.2.tar.xz | tar -xJf - -C /opt/nim --strip-components=1 && \
curl -L https://nim-lang.org/download/nim-2.0.4.tar.xz | tar -xJf - -C /opt/nim --strip-components=1 && \
cd /opt/nim && \
sh build.sh && \
bin/nim c koch && \
Expand Down
2 changes: 1 addition & 1 deletion backend/app/api/frames.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def api_frame_deploy_event(id: int):
def api_frame_update(id: int):
frame = Frame.query.get_or_404(id)
fields = ['scenes', 'name', 'frame_host', 'frame_port', 'frame_access_key', 'frame_access', 'ssh_user', 'ssh_pass', 'ssh_port', 'server_host',
'server_port', 'server_api_key', 'width', 'height', 'rotate', 'color', 'interval', 'metrics_interval',
'server_port', 'server_api_key', 'width', 'height', 'rotate', 'color', 'interval', 'metrics_interval', 'log_to_file',
'scaling_mode', 'device', 'debug']
defaults = {'frame_port': 8787, 'ssh_port': 22}
try:
Expand Down
4 changes: 4 additions & 0 deletions backend/app/models/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class Frame(db.Model):
metrics_interval = db.Column(db.Double, default=60)
scaling_mode = db.Column(db.String(64), nullable=True) # contain (default), cover, stretch, center
rotate = db.Column(db.Integer, nullable=True)
log_to_file = db.Column(db.String(256), nullable=True)
debug = db.Column(db.Boolean, nullable=True)
last_log_at = db.Column(db.DateTime, nullable=True)
# apps
Expand Down Expand Up @@ -75,6 +76,7 @@ def to_dict(self):
'debug': self.debug,
'scenes': self.scenes,
'last_log_at': self.last_log_at.replace(tzinfo=timezone.utc).isoformat() if self.last_log_at else None,
'log_to_file': self.log_to_file,
}

def new_frame(name: str, frame_host: str, server_host: str, device: Optional[str] = None, interval: Optional[float] = None) -> Frame:
Expand Down Expand Up @@ -120,6 +122,7 @@ def new_frame(name: str, frame_host: str, server_host: str, device: Optional[str
rotate=0,
background_color="#ffffff",
device=device or "web_only",
log_to_file=None, # save the SSD by default
)
db.session.add(frame)
db.session.commit()
Expand Down Expand Up @@ -177,6 +180,7 @@ def get_frame_json(frame: Frame) -> dict:
"debug": frame.debug or False,
"scalingMode": frame.scaling_mode or "contain",
"rotate": frame.rotate or 0,
"logToFile": frame.log_to_file,
}

setting_keys = set()
Expand Down
23 changes: 21 additions & 2 deletions backend/app/tasks/deploy_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,15 @@ def install_if_necessary(package: str, raise_on_error = True) -> int:
else:
cpu = "amd64"

total_memory = 0
try:
mem_output = []
exec_command(frame, ssh, "free -m", mem_output)
total_memory = int(mem_output[1].split()[1])
except Exception as e:
log(id, "stderr", str(e))
low_memory = total_memory < 512

drivers = drivers_for_device(frame.device)

# create a build .tar.gz
Expand All @@ -76,6 +85,10 @@ def install_if_necessary(package: str, raise_on_error = True) -> int:
log(id, "stdout", "- Creating build archive")
archive_path = create_local_build_archive(frame, build_dir, build_id, nim_path, source_dir, temp_dir, cpu)

if low_memory:
log(id, "stdout", "- Low memory detected, stopping FrameOS for compilation")
exec_command(frame, ssh, "sudo service frameos stop")

with SCPClient(ssh.get_transport()) as scp:
# build the release on the server
install_if_necessary("ntp")
Expand All @@ -100,11 +113,11 @@ def install_if_necessary(package: str, raise_on_error = True) -> int:
exec_command(frame, ssh, command)

exec_command(frame, ssh, "if [ ! -d /srv/frameos/ ]; then sudo mkdir -p /srv/frameos/ && sudo chown $(whoami):$(whoami) /srv/frameos/; fi")
exec_command(frame, ssh, "mkdir -p /srv/frameos/build/")
exec_command(frame, ssh, "mkdir -p /srv/frameos/build/ /srv/frameos/logs/")
log(id, "stdout", f"> add /srv/frameos/build/build_{build_id}.tar.gz")
scp.put(archive_path, f"/srv/frameos/build/build_{build_id}.tar.gz")
exec_command(frame, ssh, f"cd /srv/frameos/build && tar -xzf build_{build_id}.tar.gz && rm build_{build_id}.tar.gz")
exec_command(frame, ssh, f"cd /srv/frameos/build/build_{build_id} && PARALLEL_MEM=$(awk '/MemTotal/{{printf \"%.0f\\n\", $2/1024/150}}' /proc/meminfo) && PARALLEL=$(($PARALLEL_MEM < $(nproc) ? $PARALLEL_MEM : $(nproc))) && make -j$PARALLEL")
exec_command(frame, ssh, f"cd /srv/frameos/build/build_{build_id} && PARALLEL_MEM=$(awk '/MemTotal/{{printf \"%.0f\\n\", $2/1024/250}}' /proc/meminfo) && PARALLEL=$(($PARALLEL_MEM < $(nproc) ? $PARALLEL_MEM : $(nproc))) && make -j$PARALLEL")
exec_command(frame, ssh, f"mkdir -p /srv/frameos/releases/release_{build_id}")
exec_command(frame, ssh, f"cp /srv/frameos/build/build_{build_id}/frameos /srv/frameos/releases/release_{build_id}/frameos")
log(id, "stdout", f"> add /srv/frameos/releases/release_{build_id}/frame.json")
Expand Down Expand Up @@ -152,6 +165,12 @@ def install_if_necessary(package: str, raise_on_error = True) -> int:
elif drivers.get("noSpi"):
exec_command(frame, ssh, 'sudo raspi-config nonint do_spi 1')

if low_memory:
# disable apt-daily-upgrade (sudden +70mb memory usage, might lead a Zero W 2 to endlessly swap)
exec_command(frame, ssh, "sudo systemctl disable apt-daily.service apt-daily.timer apt-daily-upgrade.timer apt-daily-upgrade.service")
# # disable swap while we're at it
# exec_command(frame, ssh, "sudo systemctl disable dphys-swapfile.service")

# restart
exec_command(frame, ssh, "sudo systemctl daemon-reload")
exec_command(frame, ssh, "sudo systemctl enable frameos.service")
Expand Down
30 changes: 30 additions & 0 deletions backend/migrations/versions/1e2acb9652e8_log_to_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
"""log to file
Revision ID: 1e2acb9652e8
Revises: b29d2204547c
Create Date: 2024-04-26 01:13:40.516170
"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = '1e2acb9652e8'
down_revision = 'b29d2204547c'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('frame', schema=None) as batch_op:
batch_op.add_column(sa.Column('log_to_file', sa.String(length=256), nullable=True))
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
with op.batch_alter_table('frame', schema=None) as batch_op:
batch_op.drop_column('log_to_file')
# ### end Alembic commands ###
2 changes: 1 addition & 1 deletion frameos/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ run: build
./build/frameos --verbose

build:
nimble build -y --verbose
nimble build -y --verbose --lineTrace:on

test:
nimble test
2 changes: 2 additions & 0 deletions frameos/frame.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
"metricsInterval": 60.0,
"scalingMode": "cover",
"frameAccess": "public",
"logToFile": "/tmp/frameos.log",
"rotate": 0,
"debug": true,
"settings": {
"sentry": {
"frame_dsn": null
Expand Down
3 changes: 2 additions & 1 deletion frameos/src/apps/haSensor/app.nim
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json, strformat, httpclient, options, times
import json, strformat, options, times
import lib/httpclient
import frameos/types

type
Expand Down
2 changes: 1 addition & 1 deletion frameos/src/apps/openai/app.nim
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import times
import options
import json
import strformat
import httpclient
import lib/httpclient
from frameos/utils/image import scaleAndDrawImage
import frameos/types

Expand Down
2 changes: 1 addition & 1 deletion frameos/src/apps/openaiText/app.nim
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import times
import options
import json
import strformat
import httpclient
import lib/httpclient
from frameos/utils/image import scaleAndDrawImage
import frameos/types

Expand Down
1 change: 1 addition & 0 deletions frameos/src/frameos/config.nim
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ proc loadConfig*(filename: string = "frame.json"): FrameConfig =
rotate: data{"rotate"}.getInt(),
scalingMode: data{"scalingMode"}.getStr(),
settings: data{"settings"},
logToFile: data{"logToFile"}.getStr(),
debug: data{"debug"}.getBool() or commandLineParams().contains("--debug")
)
setConfigDefaults(result)
Expand Down
1 change: 1 addition & 0 deletions frameos/src/frameos/frameos.nim
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ proc start*(self: FrameOS) {.async.} =
"metricsInterval": self.frameConfig.metricsInterval,
"scalingMode": self.frameConfig.scalingMode,
"rotate": self.frameConfig.rotate,
"logToFile": self.frameConfig.logToFile,
"debug": self.frameConfig.debug,
}}
self.logger.log(message)
Expand Down
116 changes: 74 additions & 42 deletions frameos/src/frameos/logger.nim
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import httpclient, zippy, json, sequtils, os, times, math
import zippy, json, sequtils, os, times, math, strutils, net
import lib/httpclient

import frameos/channels
import frameos/types
Expand All @@ -16,57 +17,87 @@ const LOG_FLUSH_SECONDS = 1.0

var threadInitDone = false
var thread: Thread[FrameConfig]
var logFile: File

proc logToFile(filename: string, logJson: JsonNode) =
if filename.len > 0:
try:
let file = if "{date}" in filename:
filename.replace("{date}", now().format("yyyyMMdd"))
else:
filename
logFile = open(file, fmAppend)
logFile.write(now().format("[yyyy-MM-dd'T'HH:mm:ss]") & " " & $logJson & "\n")
logFile.close()
except Exception as e:
echo "Error writing to log file: " & $e.msg

proc processQueue(self: LoggerThread): int =
let logCount = (self.logs.len + self.erroredLogs.len)
if logCount > 10 or (logCount > 0 and self.lastSendAt + LOG_FLUSH_SECONDS < epochTime()):
# make a copy, just in case some thread from somewhere adds new entries
var newLogs = self.erroredLogs.concat(self.logs)
self.logs = @[]
self.erroredLogs = @[]

if newLogs.len == 0:
return 0

if newLogs.len > 100:
newLogs = newLogs[(newLogs.len - 100) .. (newLogs.len - 1)]

var client = newHttpClient(timeout = 1000)
try:
client.headers = newHttpHeaders([
("Authorization", "Bearer " & self.frameConfig.serverApiKey),
("Content-Type", "application/json"),
("Content-Encoding", "gzip")
])
let body = %*{"logs": newLogs}
let response = client.request(self.url, httpMethod = HttpPost, body = compress($body))
self.lastSendAt = epochTime()
if response.code == Http200:
if self.erroredLogs.len > 0:
self.erroredLogs = @[]
else:
echo "Error sending logs: HTTP " & $response.status
logToFile(self.frameConfig.logToFile, %*{"error": "Error sending logs", "status": response.status})
self.erroredLogs = newLogs
except CatchableError as e:
echo "Error sending logs: " & $e.msg
logToFile(self.frameConfig.logToFile, %*{"error": "Error sending logs", "message": e.msg})
self.erroredLogs = newLogs
finally:
client.close()

if self.erroredLogs.len > 0:
return -self.erroredLogs.len
else:
return newLogs.len


proc run(self: LoggerThread) =
var attempt = 0
while true:
let logCount = (self.logs.len + self.erroredLogs.len)
if logCount > 10 or (logCount > 0 and self.lastSendAt + LOG_FLUSH_SECONDS < epochTime()):
# make a copy, just in case some thread from somewhere adds new entries
var newLogs = self.erroredLogs.concat(self.logs)
self.logs = @[]
self.erroredLogs = @[]

if newLogs.len == 0:
# how did we even get here?
sleep(100)
continue

if newLogs.len > 100:
newLogs = newLogs[(newLogs.len - 100) .. (newLogs.len - 1)]

var client = newHttpClient(timeout = 10000)
try:
client.headers = newHttpHeaders([
("Authorization", "Bearer " & self.frameConfig.serverApiKey),
("Content-Type", "application/json"),
("Content-Encoding", "gzip")
])
let body = %*{"logs": newLogs}
let response = client.request(self.url, httpMethod = HttpPost, body = compress($body))
self.lastSendAt = epochTime()
if response.code != Http200:
echo "Error sending logs: HTTP " & $response.status
self.erroredLogs = newLogs
except CatchableError as e:
echo "Error sending logs: " & $e.msg
self.erroredLogs = newLogs
finally:
client.close()

if self.erroredLogs.len > 0:
attempt += 1
let sleepDuration = min(100 * (2 ^ attempt), 7500)
echo "Sleeping for " & $sleepDuration & "ms, attempt " & $attempt & ". Logs queued: " & $self.erroredLogs.len
sleep(sleepDuration)
else:
attempt = 0
let processedLogs = self.processQueue()
if processedLogs == 0:
sleep(100)
elif processedLogs < 0:
attempt += 1
let sleepDuration = min(100 * (2 ^ attempt), 7500)
echo "Sleeping for " & $sleepDuration & "ms, attempt " & $attempt & ". Logs queued: " & $self.erroredLogs.len
logToFile(self.frameConfig.logToFile, %*{"sleep": "Error sending logs", "duration": sleepDuration,
"attempt": attempt, "queued": self.erroredLogs.len})
sleep(sleepDuration)
else:
attempt = 0

let (success, payload) = logChannel.tryRecv()
if success:
if self.frameConfig.debug:
echo payload
self.logs.add(payload)
logToFile(self.frameConfig.logToFile, payload)
else:
sleep(100)

Expand All @@ -85,6 +116,7 @@ proc createThreadRunner(frameConfig: FrameConfig) {.thread.} =
run(loggerThread)
except Exception as e:
echo "Error in logger thread: " & $e.msg
logToFile(frameConfig.logToFile, %*{"error": "Error in logger thread", "message": $e.msg})
sleep(1000)

proc newLogger*(frameConfig: FrameConfig): Logger =
Expand Down
12 changes: 10 additions & 2 deletions frameos/src/frameos/metrics.nim
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import json, os, psutil, strutils, sequtils
import json, os, psutil, strutils, sequtils, posix
import frameos/types
import frameos/channels

Expand Down Expand Up @@ -36,13 +36,21 @@ proc getMemoryUsage(self: MetricsLoggerThread): JsonNode =
proc getCPUUsage(self: MetricsLoggerThread): float =
result = psutil.cpuPercent(interval = 1)

proc getOpenFileDescriptors(self: MetricsLoggerThread): int =
var fdCount = 0
let dir = "/proc/" & $getpid() & "/fd"
for _ in walkDir(dir):
inc(fdCount)
return fdCount

proc logMetrics(self: MetricsLoggerThread) =
log(%*{
"event": "metrics",
"load": self.getLoadAverage(),
"cpuTemperature": self.getCPUTemperature(),
"memoryUsage": self.getMemoryUsage(),
"cpuUsage": self.getCPUUsage()
"cpuUsage": self.getCPUUsage(),
"openFileDescriptors": self.getOpenFileDescriptors(),
})

proc start(self: MetricsLoggerThread) =
Expand Down
2 changes: 1 addition & 1 deletion frameos/src/frameos/tests/test_config.nim
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ block test_load_config:
doAssert config.device == "web_only"
doAssert config.metrics_interval == 60 # 60.0 in frame.json
doAssert config.rotate == 0
doAssert config.debug == false
doAssert config.debug == true
doAssert config.scalingMode == "cover"
doAssert config.settings == %*{"sentry": {"frame_dsn": nil}}
doAssert config.settings{"sentry"} == %*{"frame_dsn": nil}
Expand Down
1 change: 1 addition & 0 deletions frameos/src/frameos/types.nim
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type
rotate*: int
scalingMode*: string
settings*: JsonNode
logToFile*: string
debug*: bool

Logger* = ref object
Expand Down
Loading

0 comments on commit 0414400

Please sign in to comment.