@@ -6,52 +6,180 @@ import TidbitsOCM._
// Currently only a system to translate data widths.
// Can be enhanced if needed.
// Endianness will fug shit up!
class InputTranslater(input_width: Int, output_width: Int) extends Module{
// Only works if output_width > Input_width
// Dont judge me, I know it looks shit
class InputTranslator(input_width: Int, output_width: Int) extends Module{


// From rosetta code
def gcd(a: Int, b: Int):Int=if (b==0) a.abs else gcd(b, a%b)
def lcm(a: Int, b: Int)=(a*b).abs/gcd(a,b)

var buf_size = lcm(input_width, output_width)

var total_inputs = buf_size/input_width
var total_outputs = buf_size/output_width

println("%d, %d, %d".format(total_inputs, total_outputs, buf_size))

val io = new Bundle {

val input_valid = Bool(INPUT)
val input_data = UInt(INPUT, input_width)

val output_valid = UInt(OUTPUT, output_width)
val output_data = Bool(OUTPUT)
val output_valid = Bool(OUTPUT)
val output_data = UInt(OUTPUT, output_width)

val dbg_current = Bool(OUTPUT)

val dbg_inputs_performed = UInt(OUTPUT)
val dbg_outputs_performed = UInt(OUTPUT)

val dbg_inputs_finished = Bool(OUTPUT)
val dbg_outputs_finished = Bool(OUTPUT)

val dbg_buf1 = UInt(OUTPUT)
val dbg_buf2 = UInt(OUTPUT)
}

var buf_size = 0

if(input_width > output_width){
buf_size = (input_width + (input_width - output_width))
val buffer1 = Reg(init=UInt(0, buf_size))
val buffer2 = Reg(init=UInt(0, buf_size))

val inputs_finished = Reg(init=Bool(false))
val outputs_finished = Reg(init=Bool(true))

val inputs_performed = Reg(init=UInt(0, 8))
val outputs_performed = Reg(init=UInt(0, 8))

val current = Reg(init=Bool(false))

io.dbg_current := current
io.dbg_inputs_performed := inputs_performed
io.dbg_outputs_performed := outputs_performed
io.dbg_inputs_finished := inputs_finished
io.dbg_outputs_finished := outputs_finished
io.dbg_buf1 := buffer1
io.dbg_buf2 := buffer2

// fug...
for(i <- 0 until total_inputs){
when(io.input_valid){
when(inputs_performed === UInt(i)){
when(current){
// buffer1( (i+1)*input_width - 1, (i)*input_width) := io.input_data
buffer1( (total_inputs - i)*input_width - 1, (total_inputs - i - 1)*input_width) := io.input_data
}
.otherwise{
// buffer2( (i+1)*input_width - 1, (i)*input_width) := io.input_data
buffer2( (total_inputs - i)*input_width - 1, (total_inputs - i - 1)*input_width) := io.input_data
}
}
}
}
else{
buf_size = 2*output_width

when(io.input_valid){
when(inputs_performed === UInt(total_inputs - 1)){
inputs_performed := UInt(0)
inputs_finished := Bool(true)
}
.otherwise{
inputs_performed := inputs_performed + UInt(1)
}
}


// Way overshoots but thats OK
val buffer = Reg(init=UInt(0, 128))
val top = Reg(init=UInt(0, 8))

// We have three cases to consider:
//
// #1: input is valid and there is not enough data for a valid output
// #2: input is valid and we have enough data to output
// #3: neither in or out is valid
for(i <- 0 until total_outputs){
when(!outputs_finished){
io.output_valid := Bool(true)

when(outputs_performed === UInt(i)){
when(current){
io.output_data := buffer2((i+1)*output_width - 1, (i)*output_width)
}
.otherwise{
io.output_data := buffer1((i+1)*output_width - 1, (i)*output_width)
}
}
}
}

when(!outputs_finished){
when(outputs_performed === UInt(total_outputs - 1)){
outputs_finished := Bool(true)
}
.otherwise{
outputs_performed := outputs_performed + UInt(1)
}
}
.otherwise{
io.output_valid := Bool(false)
io.output_data := UInt(57005)
}

when(outputs_finished && inputs_finished){
current := ~current
outputs_finished := Bool(false)
inputs_finished := Bool(false)

when(io.input_valid){
inputs_finished := UInt(1)

when(~current){
buffer1(input_width - 1, 0) := io.input_data
}
.otherwise{
buffer2(input_width - 1, 0) := io.input_data
}
}
}
}

class TranslatorTest(c: InputTranslator) extends Tester(c) {

// when(io.input_valid){
// // 1
// when(top > UInt(output_width)){
// top := top + UInt(input_width)
// buffer := buffer + (input_data << top)
// }
// // 2
// when(top <= UInt(output_width)){
// output_data := UInt(123)
// top := top + UInt(input_width - output_width)
// buffer(0, UInt(output
//
// }
poke(c.io.input_valid, false)
step(1)

io.output_data := UInt(0)
io.output_valid := Bool(false)
println("Slow translate test")
for(i <- 1 until 25){
if(i%2 == 0){
if(i%6 == 0){
println("3456")
poke(c.io.input_data, 13398)
poke(c.io.input_valid, true)
}else if(i%4 == 0){
println("5612")
poke(c.io.input_data, 22034)
poke(c.io.input_valid, true)
}else{
println("1234")
poke(c.io.input_data, 4660)
poke(c.io.input_valid, true)
}
}
else{
poke(c.io.input_data, 0)
poke(c.io.input_valid, false)
}
println()
println()
println("STATE")
peek(c.io.dbg_current)
peek(c.io.dbg_buf1)
peek(c.io.dbg_buf2)
println("IN")
peek(c.io.dbg_inputs_performed)
peek(c.io.dbg_inputs_finished)
peek(c.io.input_valid)
peek(c.io.input_data)
println("OUT")
peek(c.io.dbg_outputs_performed)
peek(c.io.dbg_outputs_finished)
peek(c.io.output_valid)
peek(c.io.output_data)
println()
step(1)
println()
}
}

@@ -4,24 +4,24 @@ import Chisel._

class KernelController(data_width: Int, kernel_dim: Int) extends Module {

val inactive_kernels = (kernel_dim/2)*2 // In case of even numbered kernel
val inactive_kernels = kernel_dim - 1
val total_kernels = kernel_dim*kernel_dim

val io = new Bundle {
val kernel_valid = Bool(INPUT)
val kernel_in = UInt(INPUT, data_width)

val input_valid = Bool(INPUT)
val data_in = UInt(INPUT, data_width)

val kernel_stage = Bool(INPUT)
val stall = Bool(INPUT)

val kernel_out = UInt(OUTPUT, data_width)
val freeze_kernels = Bool(OUTPUT)
val stall_alu = Bool(OUTPUT)
}

val kernel_buffer = Vec.fill(inactive_kernels){ Reg(init=SInt(0, width=data_width)) }
val kernel_count = Reg(init=UInt(0, 32))
val instruction_mode :: sleep :: Nil = Enum(UInt(), 2)
val state = Reg(init=UInt(instruction_mode))


// Wish I was better at scala
def propagate_kernels(): Unit = { for(i <- 1 until inactive_kernels){
@@ -30,32 +30,24 @@ class KernelController(data_width: Int, kernel_dim: Int) extends Module {
}


io.freeze_kernels := Bool(false)
io.stall_alu := Bool(false)
io.kernel_out := UInt(57005)



// When in instruction mode we want to feed the kernel chain
when(state === instruction_mode){
when(io.kernel_valid){
when(io.kernel_stage){
when(io.input_valid){
kernel_count := kernel_count + UInt(1)
kernel_buffer(0) := io.kernel_in
kernel_buffer(0) := io.data_in
propagate_kernels()
}
.otherwise{
io.freeze_kernels := Bool(true)
}

when(kernel_count === UInt(total_kernels - 1)){
state := sleep
io.stall_alu := Bool(true)
}
}


io.kernel_out := UInt(57005)

when(state === sleep){
.elsewhen(!io.stall){
propagate_kernels()
io.kernel_out := kernel_buffer(inactive_kernels - 1)
kernel_buffer(0) := io.kernel_in
}

}
@@ -10,14 +10,19 @@ class Mapper(data_width: Int) extends Module {

val io = new Bundle {

val get_map_instruction = Bool(INPUT)

val pixel_in = UInt(INPUT, data_width)
val kernel_in = SInt(INPUT, data_width)
val stall = Bool(INPUT)

val mapped_pixel = SInt(OUTPUT, data_width)
val kernel_out = SInt(OUTPUT, data_width)

}

val instruction = Reg(UInt(0, 4))

val kernel = Reg(UInt(width=data_width))

val color1 = io.pixel_in(7,0)
@@ -26,6 +31,12 @@ class Mapper(data_width: Int) extends Module {

io.kernel_out := UInt(57005)
when(!io.stall){

when(io.get_map_instruction){
instruction := io.kernel_in
io.mapped_pixel := io.kernel_in
}

kernel := io.kernel_in
io.kernel_out := kernel
}
@@ -8,6 +8,7 @@ class Orchestrator(val cols: Int, val rows: Int) extends Module {

val io = new Bundle {
val stall = Bool(INPUT)
val reset = Bool(INPUT)

val read_row = Vec.fill(rows){ Bool(OUTPUT) }
val mux_row = Vec.fill(rows){ Bool(OUTPUT) }
@@ -90,15 +91,17 @@ class Orchestrator(val cols: Int, val rows: Int) extends Module {
println("Period of system: %d".format(period))

// count
when(!io.stall){
when(io.reset){
time := UInt(0)
}
.elsewhen(!io.stall){
when(time === UInt(period - 1)){
time := UInt(0)
}.otherwise{
time := time + UInt(1)
}
}

// Ping row read and mux TODO default in a for loop maybe bad?
for(i <- 0 until rows){
println("Adding row read at time %d".format(rowreads(i)))
when(time === UInt(rowreads(i))){
@@ -9,8 +9,16 @@ class Processor(data_width: Int, val cols: Int, rows: Int, kernel_dim: Int) exte

val stall = Bool(INPUT)

val input_ready = Bool(INPUT)
val input_valid = Bool(INPUT)
val data_in = UInt(INPUT, data_width)
val processor_sleep = Bool(INPUT)

val stage = new Bundle {
val data_stage = Bool(INPUT)
val kernel_stage = Bool(INPUT)
val reduce_stage = Bool(INPUT)
val map_stage = Bool(INPUT)
}

val ALU_data_out = UInt(OUTPUT, data_width)
val ALU_data_is_valid = Bool(OUTPUT)
@@ -34,12 +42,11 @@ class Processor(data_width: Int, val cols: Int, rows: Int, kernel_dim: Int) exte
ALUs.io.selector_shift := processor_control.io.ALU_shift
ALUs.io.accumulator_flush := processor_control.io.accumulator_flush
ALUs.io.kernel_in := kernel_control.io.kernel_out
ALUs.io.stall := kernel_control.io.stall
ALUs.io.stall := io.stall
ALUs.io.stall := (kernel_control.io.stall_alu || io.stall)

kernel_control.io.kernel_in := io.data_in
kernel_control.io.kernel_in := ALUs.io.kernel_out
kernel_control.io.stall := io.stall
kernel_control.io.kernel_valid := io.input_ready
kernel_control.io.kernel_stage := io.stage.kernel_stage

processor_control.io.stall := io.stall

@@ -50,7 +57,7 @@ class Processor(data_width: Int, val cols: Int, rows: Int, kernel_dim: Int) exte
class ConveyorTest(c: Processor) extends Tester(c) {

poke(c.io.stall, true)
poke(c.io.input_ready, true)
poke(c.io.input_valid, true)

for(cycle <- 0 until 6){
for(i <- 0 until c.cols){
@@ -75,7 +82,7 @@ class ConveyorTest(c: Processor) extends Tester(c) {
class ProcessorTest(c: Processor) extends Tester(c) {

poke(c.io.stall, true)
poke(c.io.input_ready, true)
poke(c.io.input_valid, true)

for(cycle <- 0 until 6){
for(i <- 0 until c.cols){
@@ -8,14 +8,10 @@ import TidbitsOCM._
class ProcessorController(data_width: Int, cols: Int, rows: Int, kernel_dims: Int) extends Module{

val io = new Bundle {

val control_signals = new Bundle {
val reset = Bool(INPUT)
}

val input_valid = Bool(INPUT)
val alu_stall = Bool(OUTPUT)
}

val programming_mode :: data_mode :: sleep :: Nil = Enum(UInt(), 3)


}
@@ -8,6 +8,9 @@ import Chisel._
class Reducer(data_width: Int) extends Module {

val io = new Bundle {

val get_reduce_instruction = Bool(INPUT)

val mapped_pixel = SInt(INPUT, data_width)
val flush = Bool(INPUT)
val stall = Bool(INPUT)
@@ -16,20 +19,19 @@ class Reducer(data_width: Int) extends Module {
val valid_out = Bool(OUTPUT)
}

val instruction = Reg(UInt(0, 24))
val accumulator = Reg(init=SInt(0, data_width))
when(io.flush){
accumulator := io.mapped_pixel
}
.otherwise{
accumulator := accumulator + io.mapped_pixel
}

val color1 = io.mapped_pixel(7,0)
val color2 = io.mapped_pixel(15,8)
val color3 = io.mapped_pixel(23,16)

when(!io.stall){
when(io.flush){

when(io.get_reduce_instruction){
instruction := io.mapped_pixel
}
.elsewhen(io.flush){
accumulator(7, 0) := color1
accumulator(15, 8) := color2
accumulator(23, 16) := color3
@@ -6,7 +6,7 @@ import Chisel._
import TidbitsOCM._


class SliceDoubleBuffer(val row_length: Int, data_width: Int, kernel_dim: Int) extends Module {
class SliceDoubleBuffer(val row_length: Int, input_data_width: Int, pixel_data_width: Int, kernel_dim: Int) extends Module {

val cols = kernel_dim*kernel_dim
val total_reads = row_length*cols
@@ -3,14 +3,17 @@ package Core
import Chisel._
import TidbitsOCM._

class Tile(img_width: Int, input_data_width: Int, data_width: Int, cols: Int, rows: Int) extends Module{
class Tile(img_width: Int, control_data_width: Int, pixel_data_width: Int, HDMI_data_width: Int, cols: Int, rows: Int) extends Module{

val kernel_dim = rows
val img_height = 480

val io = new Bundle {
val data_in = UInt(INPUT, data_width)
val input_valid = Bool(INPUT)
val control_data_in = UInt(INPUT, control_data_width)
val control_input_valid = Bool(INPUT)

val hdmi_data_in = UInt(INPUT, HDMI_data_width)
val hdmi_input_valid = Bool(INPUT)

val reset = Bool(INPUT)
val active = Bool(INPUT) //Not used, but wired
@@ -31,8 +34,14 @@ class Tile(img_width: Int, input_data_width: Int, data_width: Int, cols: Int, ro

// Processor processes data. Incredible
Processor.io.data_in := InputHandler.io.data_out

// Controller takes the output of the processor and checks if it is valid
Processor.io.processor_sleep := SystemControl.io.processor_sleep
Processor.io.stage.data_stage := SystemControl.io.stage.data_stage
Processor.io.stage.kernel_stage := SystemControl.io.stage.kernel_stage
Processor.io.stage.reduce_stage := SystemControl.io.stage.reduce_stage
Processor.io.stage.map_stage := SystemControl.io.stage.map_stage

// Controller checks input and output for the processor, determining validity.
// Handles instructing the processor
SystemControl.io.processor_input_is_valid := InputHandler.io.data_ready
SystemControl.io.ALU_output := Processor.io.ALU_data_out
SystemControl.io.ALU_output_is_valid := Processor.io.ALU_data_is_valid
@@ -145,16 +154,6 @@ class InputTest(c: Tile) extends Tester(c) {

}











class TileTest(c: Tile) extends Tester(c) {
}